comparison models/extractapp.php @ 86:0141df465205 extractapp_dev

New: add chinese characters to pinyin
author Zoe Hong <zhong@mpiwg-berlin.mpg.de>
date Thu, 30 Apr 2015 10:52:05 +0200
parents ecc4b22e9b05
children fb5049fc5dd7
comparison
equal deleted inserted replaced
85:966a36752c34 86:0141df465205
74 // get book_meta from $$this->get_section_metadata_by_sectionId_url 74 // get book_meta from $$this->get_section_metadata_by_sectionId_url
75 $section_meta_url = $this->get_section_metadata_by_sectionId_url.$this->section_id; 75 $section_meta_url = $this->get_section_metadata_by_sectionId_url.$this->section_id;
76 $section_meta = json_decode(file_get_contents($section_meta_url), true); 76 $section_meta = json_decode(file_get_contents($section_meta_url), true);
77 77
78 $b = $section_meta['section']['book']; 78 $b = $section_meta['section']['book'];
79 79
80 $this->book_name = $b['name'];
81 $this->book_id = $b['id'];
82 /*
80 $book_meta = array(); 83 $book_meta = array();
81 array_push($book_meta, array($b['id'],$b['name'],$b['level1'],$b['level2'], 84 array_push($book_meta, array($b['id'],$b['name'],$b['level1'],$b['level2'],
82 $b['period'],$b['dynasty'], $b['start_year'], $b['end_year'],$b['line'], 85 $b['period'],$b['dynasty'], $b['start_year'], $b['end_year'],$b['line'],
83 $b['volume'],$b['author'], $b['edition'],$b['in_jibengujiku'], $b['admin_type'])); // missing author,year,pagenumber 86 $b['volume'],$b['author'], $b['edition'],$b['in_jibengujiku'], $b['admin_type'])); // missing author,year,pagenumber
87 */
88 /*
89 $book_meta = array('id'=>$b['id'],'name'=>$b['name'],'level1'=>$b['level1'],'level2'=>$b['level2'],
90 'period'=>$b['period'],'dynasty'=>$b['dynasty'],'start_year'=>$b['start_year'], 'end_year'=>$b['end_year'],'line'=>$b['line'],
91 'volume'=>$b['volume'],'author'=>$b['author'],'edition'=>$b['edition'],
92 'in_jibengujiku'=>$b['in_jibengujiku'],'admin_type'=>$b['admin_type']); // missing author,year,pagenumber
93 */
94
95 $book_meta->id = $b['id'];
96 $book_meta->name = $b['name'];
97 $book_meta->level1 = $b['level1'];
98 $book_meta->level2 = $b['level2'];
99 $book_meta->period = $b['period'];
100 $book_meta->dynasty = $b['dynasty'];
101 $book_meta->start_year = $b['start_year'];
102 $book_meta->end_year = $b['end_year'];
103 $book_meta->line = $b['line'];
104 $book_meta->volume = $b['volume'];
105 $book_meta->author = $b['author'];
106 $book_meta->edition = $b['edition'];
107 $book_meta->in_jibengujiku = $b['in_jibengujiku'];
108 $book_meta->admin_type = $b['admin_type'];
109
110
84 $this->book_meta = $book_meta; 111 $this->book_meta = $book_meta;
85 112
86 } 113 }
87 114
88 public function SetTextFromSectionId() { // remove $_postdata as input 115 public function SetTextFromSectionId() { // remove $_postdata as input
211 } 238 }
212 } 239 }
213 return true; 240 return true;
214 241
215 } 242 }
216 243
217 // === for tagging === 244 // === for tagging ===
218 public function StartTagging() { 245 public function StartTagging() {
219 /** 246 /**
220 * This is the main method for tagging text. It passes all the information to "views/Extractapp/TaggingText.php" view. 247 * This is the main method for tagging text. It passes all the information to "views/Extractapp/TaggingText.php" view.
221 * The information contain the text string, taglist array, wordlis array, topic, etc. 248 * The information contain the text string, taglist array, wordlis array, topic, etc.
227 $data = array(); // data array to be passed to view 254 $data = array(); // data array to be passed to view
228 255
229 //$taglistArray = $this->GetTaglistArray(); 256 //$taglistArray = $this->GetTaglistArray();
230 //for GetTaglistByTopicId: 257 //for GetTaglistByTopicId:
231 $taglistArray = $this->GetTaglistByTopicId($this->GetTopic()); 258 $taglistArray = $this->GetTaglistByTopicId($this->GetTopic());
232 259
233 // $this->taglist_infile is set (1) from file or (2) from _postdata['taglistArray'] which comes from frontend that user decided 260 // $this->taglist_infile is set (1) from file or (2) from _postdata['taglistArray'] which comes from frontend that user decided
234 // $this->taglist_infile is the most up-to-date taglist decided by user. Should be written into file. 261 // $this->taglist_infile is the most up-to-date taglist decided by user. Should be written into file.
235 if( $this->TaglistSubsetIn($this->taglist_infile, $taglistArray) ) { // TaglistSubsetIn($l1,$l2): $l1 is a subset of $l2 or not 262 if( $this->TaglistSubsetIn($this->taglist_infile, $taglistArray) ) { // TaglistSubsetIn($l1,$l2): $l1 is a subset of $l2 or not
236 $this->taglist_infile = ""; 263 $this->taglist_infile = "";
237 } 264 }
251 $data['taglistArray'] = $taglistArray; 278 $data['taglistArray'] = $taglistArray;
252 $data['wordlistArray'] = $wordlistArray; 279 $data['wordlistArray'] = $wordlistArray;
253 $data['section_id'] = $section_id; 280 $data['section_id'] = $section_id;
254 $data['topiclistArray'] = $topiclistArray; 281 $data['topiclistArray'] = $topiclistArray;
255 $data['default_topic_id'] = $this->GetTopic(); 282 $data['default_topic_id'] = $this->GetTopic();
256 $data['topic_tag'] = $this->GetTopicTag($this->GetTopic()); 283 $topic_id = $this->GetTopic();
257 $data['topic_name'] = $this->GetTopicName($this->GetTopic()); 284
285 $data['topic_tag'] = $this->GetTopicTag($topic_id);
286 $data['topic_name'] = $this->GetTopicName($topic_id);
287 $data['topic_tag_ch'] = $this->GetTopicTagName($topic_id);
258 288
259 289
260 $data['info'] = array('file_id'=>$this->file_id, 'user_id'=>$this->user_id, 290 $data['info'] = array('file_id'=>$this->file_id, 'user_id'=>$this->user_id,
261 'branch_id'=>$this->branch_id, 'section_id'=>$this->section_id, 'book_id'=>$this->book_id, 291 'branch_id'=>$this->branch_id, 'section_id'=>$this->section_id, 'book_id'=>$this->book_id,
262 'book_name'=>$this->book_name, 'section_name'=>$this->section_name, 292 'book_name'=>$this->book_name,'section_name'=>$this->section_name, 'period'=>$this->book_meta->period,
263 'current_fileId'=>$this->current_fileId); 293 'current_fileId'=>$this->current_fileId);
264 294
265 $this->messages['debug'] .= "[Debug] "; 295 $this->messages['debug'] .= "[Debug] ";
266 $this->messages['debug'] .= "file_id=".$this->file_id.", section_id=".$this->section_id; 296 $this->messages['debug'] .= "file_id=".$this->file_id.", section_id=".$this->section_id;
267 $this->messages['debug'] .= ", user_id=".$this->user_id.", branch_id=".$this->branch_id.", topic_id=".$this->topic; 297 $this->messages['debug'] .= ", user_id=".$this->user_id.", branch_id=".$this->branch_id.", topic_id=".$this->topic;
270 300
271 $this->messages['info'] .= "[Info] book name: ".$this->book_name; 301 $this->messages['info'] .= "[Info] book name: ".$this->book_name;
272 $this->messages['info'] .= ", section id: ". $this->section_id; 302 $this->messages['info'] .= ", section id: ". $this->section_id;
273 $this->messages['info'] .= ", branch id: ".$this->branch_id; 303 $this->messages['info'] .= ", branch id: ".$this->branch_id;
274 $this->messages['info'] .= ", file id: ".$this->file_id; 304 $this->messages['info'] .= ", file id: ".$this->file_id;
305 $this->messages['info'] .= ", period: ".$this->book_meta->period;
275 $this->messages['info'] .= "<br>"; 306 $this->messages['info'] .= "<br>";
276 307
277 308
278 $data['messages'] = $this->messages; 309 $data['messages'] = $this->messages;
279 310
306 337
307 } 338 }
308 339
309 public function UpdateInfoResponsedFromLGService($response) { 340 public function UpdateInfoResponsedFromLGService($response) {
310 /** 341 /**
311 * 342 *
312 */ 343 */
313
314 344
315 if (isset($response["file"])) { 345 if (isset($response["file"])) {
316 $response_file = $response["file"]; 346 $response_file = $response["file"];
317 } 347 }
318 if (isset($response["branch"])) { 348 if (isset($response["branch"])) {
351 $text .= "\n<text>\n"; 381 $text .= "\n<text>\n";
352 // --- topic --- 382 // --- topic ---
353 $text .= "<topic>".$this->topic."</topic>\n"; 383 $text .= "<topic>".$this->topic."</topic>\n";
354 // --- book meta data --- 384 // --- book meta data ---
355 $book = $this->book_meta; 385 $book = $this->book_meta;
386
387 $text .= "<book>\n";
388 $text .= "<id>".$book->id."</id>\n";
389 $text .= "<name>".$book->name."</name>\n";
390 $text .= "<level1>".$book->level1."</level1>\n";
391 $text .= "<level2>".$book->level2."</level2>\n";
392 $text .= "<period>".$book->period."</period>\n";
393 $text .= "<dynasty>".$book->dynasty."</dynasty>\n";
394 $text .= "<start_year>".$book->start_year."</start_year>\n";
395 $text .= "<end_year>".$book->end_year."</end_year>\n";
396 $text .= "<line>".$book->line."</line>\n";
397 $text .= "<volume>".$book->volume."</volume>\n";
398 $text .= "<author>".$book->author."</author>\n";
399 $text .= "<edition>".$book->edition."</edition>\n";
400 $text .= "<in_jibengujiku>".$book->in_jibengujiku."</in_jibengujiku>\n";
401 $text .= "<admin_type>".$book->admin_type."</admin_type>\n";
402 $text .= "</book>\n";
403
404
405 /*
356 foreach ($book as $b) { 406 foreach ($book as $b) {
357 $text .= "<book>\n"; 407 $text .= "<book>\n";
358 $text .= "<id>".$b[0]."</id>\n"; 408 $text .= "<id>".$b[0]."</id>\n";
359 $text .= "<name>".$b[1]."</name>\n"; 409 $text .= "<name>".$b[1]."</name>\n";
360 $text .= "<level1>".$b[2]."</level1>\n"; 410 $text .= "<level1>".$b[2]."</level1>\n";
370 $text .= "<in_jibengujiku>".$b[12]."</in_jibengujiku>\n"; 420 $text .= "<in_jibengujiku>".$b[12]."</in_jibengujiku>\n";
371 $text .= "<admin_type>".$b[13]."</admin_type>\n"; 421 $text .= "<admin_type>".$b[13]."</admin_type>\n";
372 $text .= "</book>\n"; 422 $text .= "</book>\n";
373 423
374 } 424 }
425 */
426
375 // --- section info --- 427 // --- section info ---
376 $text .= "<section>\n"; 428 $text .= "<section>\n";
377 $text .= "<id>".$this->section_id."</id>\n"; 429 $text .= "<id>".$this->section_id."</id>\n";
378 $text .= "<name>".$this->section_name."</name>\n"; 430 $text .= "<name>".$this->section_name."</name>\n";
379 $text .= "</section>\n"; 431 $text .= "</section>\n";
1084 private function GetTopicTag($topic_id) { 1136 private function GetTopicTag($topic_id) {
1085 $result = $this->GetTopicById($topic_id); 1137 $result = $this->GetTopicById($topic_id);
1086 $row = mysql_fetch_assoc($result); 1138 $row = mysql_fetch_assoc($result);
1087 $tag = $row['tag']; 1139 $tag = $row['tag'];
1088 return $tag; 1140 return $tag;
1089
1090 } 1141 }
1091 private function GetTopicName($topic_id) { 1142 private function GetTopicName($topic_id) {
1092 $result = $this->GetTopicById($topic_id); 1143 $result = $this->GetTopicById($topic_id);
1093 $row = mysql_fetch_assoc($result); 1144 $row = mysql_fetch_assoc($result);
1094 //$name = $row['name']; 1145 //$name = $row['name'];
1095 $name = array('name_en'=>$row['name_en'], 'name_ch'=>$row['name_ch'], 'name_pinyin'=>$row['name_pinyin']); 1146 $name = array('name_en'=>$row['name_en'], 'name_ch'=>$row['name_ch'], 'name_pinyin'=>$row['name_pinyin']);
1096 return $name; 1147 return $name;
1097 1148 }
1098 } 1149 private function GetTopicTagName($topic_id) {
1150 $topic_tag = $this->GetTopicTag($topic_id);
1151 $query = "SELECT * FROM `taglist` WHERE tag='".$topic_tag."'";
1152 $result = mysql_query($query);
1153 $row = mysql_fetch_assoc($result);
1154
1155 return $row['name'];
1156 }
1157
1158
1099 1159
1100 1160
1101 // =========================== 1161 // ===========================
1102 1162
1103 // === for manage wordlist === 1163 // === for manage wordlist ===
1288 $section_info = $this->GetSectionInfo(); 1348 $section_info = $this->GetSectionInfo();
1289 1349
1290 $bookId = $section_info['bookId']; 1350 $bookId = $section_info['bookId'];
1291 $startPage = $section_info['startPage']; 1351 $startPage = $section_info['startPage'];
1292 $endPage = $section_info['endPage']; 1352 $endPage = $section_info['endPage'];
1353 $this->section_name = $section_info['sectionName'];
1354
1293 1355
1294 $contentString=""; 1356 $contentString="";
1295 $data_path = $this->GetDataPath(); 1357 $data_path = $this->GetDataPath();
1296 if ( file_exists($data_path."parsing_files/".$section_id.".txt") ) { 1358 if ( file_exists($data_path."parsing_files/".$section_id.".txt") ) {
1297 $filename = $data_path."parsing_files/".$section_id.".txt"; 1359 $filename = $data_path."parsing_files/".$section_id.".txt";