Mercurial > hg > extraction-interface
comparison models/extractapp.php @ 86:0141df465205 extractapp_dev
New: add chinese characters to pinyin
author | Zoe Hong <zhong@mpiwg-berlin.mpg.de> |
---|---|
date | Thu, 30 Apr 2015 10:52:05 +0200 |
parents | ecc4b22e9b05 |
children | fb5049fc5dd7 |
comparison
equal
deleted
inserted
replaced
85:966a36752c34 | 86:0141df465205 |
---|---|
74 // get book_meta from $$this->get_section_metadata_by_sectionId_url | 74 // get book_meta from $$this->get_section_metadata_by_sectionId_url |
75 $section_meta_url = $this->get_section_metadata_by_sectionId_url.$this->section_id; | 75 $section_meta_url = $this->get_section_metadata_by_sectionId_url.$this->section_id; |
76 $section_meta = json_decode(file_get_contents($section_meta_url), true); | 76 $section_meta = json_decode(file_get_contents($section_meta_url), true); |
77 | 77 |
78 $b = $section_meta['section']['book']; | 78 $b = $section_meta['section']['book']; |
79 | 79 |
80 $this->book_name = $b['name']; | |
81 $this->book_id = $b['id']; | |
82 /* | |
80 $book_meta = array(); | 83 $book_meta = array(); |
81 array_push($book_meta, array($b['id'],$b['name'],$b['level1'],$b['level2'], | 84 array_push($book_meta, array($b['id'],$b['name'],$b['level1'],$b['level2'], |
82 $b['period'],$b['dynasty'], $b['start_year'], $b['end_year'],$b['line'], | 85 $b['period'],$b['dynasty'], $b['start_year'], $b['end_year'],$b['line'], |
83 $b['volume'],$b['author'], $b['edition'],$b['in_jibengujiku'], $b['admin_type'])); // missing author,year,pagenumber | 86 $b['volume'],$b['author'], $b['edition'],$b['in_jibengujiku'], $b['admin_type'])); // missing author,year,pagenumber |
87 */ | |
88 /* | |
89 $book_meta = array('id'=>$b['id'],'name'=>$b['name'],'level1'=>$b['level1'],'level2'=>$b['level2'], | |
90 'period'=>$b['period'],'dynasty'=>$b['dynasty'],'start_year'=>$b['start_year'], 'end_year'=>$b['end_year'],'line'=>$b['line'], | |
91 'volume'=>$b['volume'],'author'=>$b['author'],'edition'=>$b['edition'], | |
92 'in_jibengujiku'=>$b['in_jibengujiku'],'admin_type'=>$b['admin_type']); // missing author,year,pagenumber | |
93 */ | |
94 | |
95 $book_meta->id = $b['id']; | |
96 $book_meta->name = $b['name']; | |
97 $book_meta->level1 = $b['level1']; | |
98 $book_meta->level2 = $b['level2']; | |
99 $book_meta->period = $b['period']; | |
100 $book_meta->dynasty = $b['dynasty']; | |
101 $book_meta->start_year = $b['start_year']; | |
102 $book_meta->end_year = $b['end_year']; | |
103 $book_meta->line = $b['line']; | |
104 $book_meta->volume = $b['volume']; | |
105 $book_meta->author = $b['author']; | |
106 $book_meta->edition = $b['edition']; | |
107 $book_meta->in_jibengujiku = $b['in_jibengujiku']; | |
108 $book_meta->admin_type = $b['admin_type']; | |
109 | |
110 | |
84 $this->book_meta = $book_meta; | 111 $this->book_meta = $book_meta; |
85 | 112 |
86 } | 113 } |
87 | 114 |
88 public function SetTextFromSectionId() { // remove $_postdata as input | 115 public function SetTextFromSectionId() { // remove $_postdata as input |
211 } | 238 } |
212 } | 239 } |
213 return true; | 240 return true; |
214 | 241 |
215 } | 242 } |
216 | 243 |
217 // === for tagging === | 244 // === for tagging === |
218 public function StartTagging() { | 245 public function StartTagging() { |
219 /** | 246 /** |
220 * This is the main method for tagging text. It passes all the information to "views/Extractapp/TaggingText.php" view. | 247 * This is the main method for tagging text. It passes all the information to "views/Extractapp/TaggingText.php" view. |
221 * The information contain the text string, taglist array, wordlis array, topic, etc. | 248 * The information contain the text string, taglist array, wordlis array, topic, etc. |
227 $data = array(); // data array to be passed to view | 254 $data = array(); // data array to be passed to view |
228 | 255 |
229 //$taglistArray = $this->GetTaglistArray(); | 256 //$taglistArray = $this->GetTaglistArray(); |
230 //for GetTaglistByTopicId: | 257 //for GetTaglistByTopicId: |
231 $taglistArray = $this->GetTaglistByTopicId($this->GetTopic()); | 258 $taglistArray = $this->GetTaglistByTopicId($this->GetTopic()); |
232 | 259 |
233 // $this->taglist_infile is set (1) from file or (2) from _postdata['taglistArray'] which comes from frontend that user decided | 260 // $this->taglist_infile is set (1) from file or (2) from _postdata['taglistArray'] which comes from frontend that user decided |
234 // $this->taglist_infile is the most up-to-date taglist decided by user. Should be written into file. | 261 // $this->taglist_infile is the most up-to-date taglist decided by user. Should be written into file. |
235 if( $this->TaglistSubsetIn($this->taglist_infile, $taglistArray) ) { // TaglistSubsetIn($l1,$l2): $l1 is a subset of $l2 or not | 262 if( $this->TaglistSubsetIn($this->taglist_infile, $taglistArray) ) { // TaglistSubsetIn($l1,$l2): $l1 is a subset of $l2 or not |
236 $this->taglist_infile = ""; | 263 $this->taglist_infile = ""; |
237 } | 264 } |
251 $data['taglistArray'] = $taglistArray; | 278 $data['taglistArray'] = $taglistArray; |
252 $data['wordlistArray'] = $wordlistArray; | 279 $data['wordlistArray'] = $wordlistArray; |
253 $data['section_id'] = $section_id; | 280 $data['section_id'] = $section_id; |
254 $data['topiclistArray'] = $topiclistArray; | 281 $data['topiclistArray'] = $topiclistArray; |
255 $data['default_topic_id'] = $this->GetTopic(); | 282 $data['default_topic_id'] = $this->GetTopic(); |
256 $data['topic_tag'] = $this->GetTopicTag($this->GetTopic()); | 283 $topic_id = $this->GetTopic(); |
257 $data['topic_name'] = $this->GetTopicName($this->GetTopic()); | 284 |
285 $data['topic_tag'] = $this->GetTopicTag($topic_id); | |
286 $data['topic_name'] = $this->GetTopicName($topic_id); | |
287 $data['topic_tag_ch'] = $this->GetTopicTagName($topic_id); | |
258 | 288 |
259 | 289 |
260 $data['info'] = array('file_id'=>$this->file_id, 'user_id'=>$this->user_id, | 290 $data['info'] = array('file_id'=>$this->file_id, 'user_id'=>$this->user_id, |
261 'branch_id'=>$this->branch_id, 'section_id'=>$this->section_id, 'book_id'=>$this->book_id, | 291 'branch_id'=>$this->branch_id, 'section_id'=>$this->section_id, 'book_id'=>$this->book_id, |
262 'book_name'=>$this->book_name, 'section_name'=>$this->section_name, | 292 'book_name'=>$this->book_name,'section_name'=>$this->section_name, 'period'=>$this->book_meta->period, |
263 'current_fileId'=>$this->current_fileId); | 293 'current_fileId'=>$this->current_fileId); |
264 | 294 |
265 $this->messages['debug'] .= "[Debug] "; | 295 $this->messages['debug'] .= "[Debug] "; |
266 $this->messages['debug'] .= "file_id=".$this->file_id.", section_id=".$this->section_id; | 296 $this->messages['debug'] .= "file_id=".$this->file_id.", section_id=".$this->section_id; |
267 $this->messages['debug'] .= ", user_id=".$this->user_id.", branch_id=".$this->branch_id.", topic_id=".$this->topic; | 297 $this->messages['debug'] .= ", user_id=".$this->user_id.", branch_id=".$this->branch_id.", topic_id=".$this->topic; |
270 | 300 |
271 $this->messages['info'] .= "[Info] book name: ".$this->book_name; | 301 $this->messages['info'] .= "[Info] book name: ".$this->book_name; |
272 $this->messages['info'] .= ", section id: ". $this->section_id; | 302 $this->messages['info'] .= ", section id: ". $this->section_id; |
273 $this->messages['info'] .= ", branch id: ".$this->branch_id; | 303 $this->messages['info'] .= ", branch id: ".$this->branch_id; |
274 $this->messages['info'] .= ", file id: ".$this->file_id; | 304 $this->messages['info'] .= ", file id: ".$this->file_id; |
305 $this->messages['info'] .= ", period: ".$this->book_meta->period; | |
275 $this->messages['info'] .= "<br>"; | 306 $this->messages['info'] .= "<br>"; |
276 | 307 |
277 | 308 |
278 $data['messages'] = $this->messages; | 309 $data['messages'] = $this->messages; |
279 | 310 |
306 | 337 |
307 } | 338 } |
308 | 339 |
309 public function UpdateInfoResponsedFromLGService($response) { | 340 public function UpdateInfoResponsedFromLGService($response) { |
310 /** | 341 /** |
311 * | 342 * |
312 */ | 343 */ |
313 | |
314 | 344 |
315 if (isset($response["file"])) { | 345 if (isset($response["file"])) { |
316 $response_file = $response["file"]; | 346 $response_file = $response["file"]; |
317 } | 347 } |
318 if (isset($response["branch"])) { | 348 if (isset($response["branch"])) { |
351 $text .= "\n<text>\n"; | 381 $text .= "\n<text>\n"; |
352 // --- topic --- | 382 // --- topic --- |
353 $text .= "<topic>".$this->topic."</topic>\n"; | 383 $text .= "<topic>".$this->topic."</topic>\n"; |
354 // --- book meta data --- | 384 // --- book meta data --- |
355 $book = $this->book_meta; | 385 $book = $this->book_meta; |
386 | |
387 $text .= "<book>\n"; | |
388 $text .= "<id>".$book->id."</id>\n"; | |
389 $text .= "<name>".$book->name."</name>\n"; | |
390 $text .= "<level1>".$book->level1."</level1>\n"; | |
391 $text .= "<level2>".$book->level2."</level2>\n"; | |
392 $text .= "<period>".$book->period."</period>\n"; | |
393 $text .= "<dynasty>".$book->dynasty."</dynasty>\n"; | |
394 $text .= "<start_year>".$book->start_year."</start_year>\n"; | |
395 $text .= "<end_year>".$book->end_year."</end_year>\n"; | |
396 $text .= "<line>".$book->line."</line>\n"; | |
397 $text .= "<volume>".$book->volume."</volume>\n"; | |
398 $text .= "<author>".$book->author."</author>\n"; | |
399 $text .= "<edition>".$book->edition."</edition>\n"; | |
400 $text .= "<in_jibengujiku>".$book->in_jibengujiku."</in_jibengujiku>\n"; | |
401 $text .= "<admin_type>".$book->admin_type."</admin_type>\n"; | |
402 $text .= "</book>\n"; | |
403 | |
404 | |
405 /* | |
356 foreach ($book as $b) { | 406 foreach ($book as $b) { |
357 $text .= "<book>\n"; | 407 $text .= "<book>\n"; |
358 $text .= "<id>".$b[0]."</id>\n"; | 408 $text .= "<id>".$b[0]."</id>\n"; |
359 $text .= "<name>".$b[1]."</name>\n"; | 409 $text .= "<name>".$b[1]."</name>\n"; |
360 $text .= "<level1>".$b[2]."</level1>\n"; | 410 $text .= "<level1>".$b[2]."</level1>\n"; |
370 $text .= "<in_jibengujiku>".$b[12]."</in_jibengujiku>\n"; | 420 $text .= "<in_jibengujiku>".$b[12]."</in_jibengujiku>\n"; |
371 $text .= "<admin_type>".$b[13]."</admin_type>\n"; | 421 $text .= "<admin_type>".$b[13]."</admin_type>\n"; |
372 $text .= "</book>\n"; | 422 $text .= "</book>\n"; |
373 | 423 |
374 } | 424 } |
425 */ | |
426 | |
375 // --- section info --- | 427 // --- section info --- |
376 $text .= "<section>\n"; | 428 $text .= "<section>\n"; |
377 $text .= "<id>".$this->section_id."</id>\n"; | 429 $text .= "<id>".$this->section_id."</id>\n"; |
378 $text .= "<name>".$this->section_name."</name>\n"; | 430 $text .= "<name>".$this->section_name."</name>\n"; |
379 $text .= "</section>\n"; | 431 $text .= "</section>\n"; |
1084 private function GetTopicTag($topic_id) { | 1136 private function GetTopicTag($topic_id) { |
1085 $result = $this->GetTopicById($topic_id); | 1137 $result = $this->GetTopicById($topic_id); |
1086 $row = mysql_fetch_assoc($result); | 1138 $row = mysql_fetch_assoc($result); |
1087 $tag = $row['tag']; | 1139 $tag = $row['tag']; |
1088 return $tag; | 1140 return $tag; |
1089 | |
1090 } | 1141 } |
1091 private function GetTopicName($topic_id) { | 1142 private function GetTopicName($topic_id) { |
1092 $result = $this->GetTopicById($topic_id); | 1143 $result = $this->GetTopicById($topic_id); |
1093 $row = mysql_fetch_assoc($result); | 1144 $row = mysql_fetch_assoc($result); |
1094 //$name = $row['name']; | 1145 //$name = $row['name']; |
1095 $name = array('name_en'=>$row['name_en'], 'name_ch'=>$row['name_ch'], 'name_pinyin'=>$row['name_pinyin']); | 1146 $name = array('name_en'=>$row['name_en'], 'name_ch'=>$row['name_ch'], 'name_pinyin'=>$row['name_pinyin']); |
1096 return $name; | 1147 return $name; |
1097 | 1148 } |
1098 } | 1149 private function GetTopicTagName($topic_id) { |
1150 $topic_tag = $this->GetTopicTag($topic_id); | |
1151 $query = "SELECT * FROM `taglist` WHERE tag='".$topic_tag."'"; | |
1152 $result = mysql_query($query); | |
1153 $row = mysql_fetch_assoc($result); | |
1154 | |
1155 return $row['name']; | |
1156 } | |
1157 | |
1158 | |
1099 | 1159 |
1100 | 1160 |
1101 // =========================== | 1161 // =========================== |
1102 | 1162 |
1103 // === for manage wordlist === | 1163 // === for manage wordlist === |
1288 $section_info = $this->GetSectionInfo(); | 1348 $section_info = $this->GetSectionInfo(); |
1289 | 1349 |
1290 $bookId = $section_info['bookId']; | 1350 $bookId = $section_info['bookId']; |
1291 $startPage = $section_info['startPage']; | 1351 $startPage = $section_info['startPage']; |
1292 $endPage = $section_info['endPage']; | 1352 $endPage = $section_info['endPage']; |
1353 $this->section_name = $section_info['sectionName']; | |
1354 | |
1293 | 1355 |
1294 $contentString=""; | 1356 $contentString=""; |
1295 $data_path = $this->GetDataPath(); | 1357 $data_path = $this->GetDataPath(); |
1296 if ( file_exists($data_path."parsing_files/".$section_id.".txt") ) { | 1358 if ( file_exists($data_path."parsing_files/".$section_id.".txt") ) { |
1297 $filename = $data_path."parsing_files/".$section_id.".txt"; | 1359 $filename = $data_path."parsing_files/".$section_id.".txt"; |