Mercurial > hg > extraction-interface
comparison models/extractapp.php @ 78:960ba96efce1 extractapp
Update: click to popup remove-tag-window; select to popup tag-window
author | Zoe Hong <zhong@mpiwg-berlin.mpg.de> |
---|---|
date | Mon, 20 Apr 2015 15:44:54 +0200 |
parents | 97c1e5102a22 |
children | e6e213b26454 |
comparison
equal
deleted
inserted
replaced
77:97c1e5102a22 | 78:960ba96efce1 |
---|---|
41 | 41 |
42 $this->lg_text = $lg_text; | 42 $this->lg_text = $lg_text; |
43 | 43 |
44 } | 44 } |
45 | 45 |
46 public function SetBookMetaDataBySectionId() { | 46 public function SetBookMetadataBySectionId() { |
47 | 47 |
48 // get book_meta from $$this->get_section_metadata_by_sectionId_url | 48 // get book_meta from $$this->get_section_metadata_by_sectionId_url |
49 $section_meta_url = $this->get_section_metadata_by_sectionId_url.$this->section_id; | 49 $section_meta_url = $this->get_section_metadata_by_sectionId_url.$this->section_id; |
50 $section_meta = json_decode(file_get_contents($section_meta_url), true); | 50 $section_meta = json_decode(file_get_contents($section_meta_url), true); |
51 | 51 |
180 $stringInput = $this->lg_text; | 180 $stringInput = $this->lg_text; |
181 | 181 |
182 $data = array(); // data array to be passed to view | 182 $data = array(); // data array to be passed to view |
183 | 183 |
184 //$taglistArray = $this->GetTaglistArray(); | 184 //$taglistArray = $this->GetTaglistArray(); |
185 //for GetTaglistByTopicID: | 185 //for GetTaglistByTopicId: |
186 $taglistArray = $this->GetTaglistByTopicID($this->GetTopic()); | 186 $taglistArray = $this->GetTaglistByTopicId($this->GetTopic()); |
187 | 187 |
188 // $this->taglist_infile is set (1) from file or (2) from _postdata['taglistArray'] which comes from frontend that user decided | 188 // $this->taglist_infile is set (1) from file or (2) from _postdata['taglistArray'] which comes from frontend that user decided |
189 // $this->taglist_infile is the most up-to-date taglist decided by user. Should be written into file. | 189 // $this->taglist_infile is the most up-to-date taglist decided by user. Should be written into file. |
190 if( $this->TaglistSubsetIn($this->taglist_infile, $taglistArray) ) { // TaglistSubsetIn($l1,$l2): $l1 is a subset of $l2 or not | 190 if( $this->TaglistSubsetIn($this->taglist_infile, $taglistArray) ) { // TaglistSubsetIn($l1,$l2): $l1 is a subset of $l2 or not |
191 $this->taglist_infile = ""; | 191 $this->taglist_infile = ""; |
258 file_put_contents("data/parsing_files/".$postdata['filename'].".txt", $require); | 258 file_put_contents("data/parsing_files/".$postdata['filename'].".txt", $require); |
259 } | 259 } |
260 | 260 |
261 } | 261 } |
262 | 262 |
263 public function UpdateInfoByResponseFromLGService($response) { | 263 public function UpdateInfoResponsedFromLGService($response) { |
264 | 264 |
265 if (isset($response["file"])) { | 265 if (isset($response["file"])) { |
266 $response_file = $response["file"]; | 266 $response_file = $response["file"]; |
267 } | 267 } |
268 if (isset($response["branch"])) { | 268 if (isset($response["branch"])) { |
294 $this->current_fileId = 0; | 294 $this->current_fileId = 0; |
295 } | 295 } |
296 } | 296 } |
297 | 297 |
298 // This is only called by SaveFullTextToLGService() | 298 // This is only called by SaveFullTextToLGService() |
299 private function AppendMetaData($text_content) { | 299 private function AppendMetadata($text_content) { |
300 $text = '<?xml version="1.0" encoding="UTF-8"?>'; | 300 $text = '<?xml version="1.0" encoding="UTF-8"?>'; |
301 $text .= "\n<text>\n"; | 301 $text .= "\n<text>\n"; |
302 // --- topic --- | 302 // --- topic --- |
303 $text .= "<topic>".$this->topic."</topic>\n"; | 303 $text .= "<topic>".$this->topic."</topic>\n"; |
304 // --- book meta data --- | 304 // --- book meta data --- |
329 $text .= "</section>\n"; | 329 $text .= "</section>\n"; |
330 | 330 |
331 // ---- taglist --- | 331 // ---- taglist --- |
332 // $taglist = $this->taglist_infile; | 332 // $taglist = $this->taglist_infile; |
333 // obtain the latest taglist from db | 333 // obtain the latest taglist from db |
334 $taglist = $this->GetTaglistByTopicID($this->GetTopic()); | 334 $taglist = $this->GetTaglistByTopicId($this->GetTopic()); |
335 | 335 |
336 foreach ($taglist as $tagitem) { | 336 foreach ($taglist as $tagitem) { |
337 $text .= "<tagitem>\n"; | 337 $text .= "<tagitem>\n"; |
338 $text .= "<id>".$tagitem[0]."</id>\n"; | 338 $text .= "<id>".$tagitem[0]."</id>\n"; |
339 $text .= "<name>".$tagitem[1]."</name>\n"; | 339 $text .= "<name>".$tagitem[1]."</name>\n"; |
347 $text .= "</text>"; | 347 $text .= "</text>"; |
348 // ---- | 348 // ---- |
349 | 349 |
350 return $text; | 350 return $text; |
351 } | 351 } |
352 | |
352 public function SaveFullTextToLGService($_postdata) { | 353 public function SaveFullTextToLGService($_postdata) { |
353 // save tagged text (full text) by Jorge's API to lg service | 354 // save tagged text (full text) by Jorge's API to lg service |
354 // -------- | 355 // -------- |
355 global $AT_LOCAL; | 356 global $AT_LOCAL; |
356 | 357 |
383 // -- new branch case | 384 // -- new branch case |
384 $require = "<text_content>".$require."</text_content>\n"; | 385 $require = "<text_content>".$require."</text_content>\n"; |
385 } | 386 } |
386 */ | 387 */ |
387 | 388 |
388 $require = $this->AppendMetaData($require); | 389 $require = $this->AppendMetadata($require); |
389 | 390 |
390 //saving in my local machine in developing phrase | 391 //saving in my local machine in developing phrase |
391 if ($AT_LOCAL) { | 392 if ($AT_LOCAL) { |
392 file_put_contents("data/parsing_files/".$_postdata['sectionId'].".txt", $require); | 393 file_put_contents("data/parsing_files/".$_postdata['sectionId'].".txt", $require); |
393 } | 394 } |
452 return $response; | 453 return $response; |
453 | 454 |
454 } | 455 } |
455 | 456 |
456 // --- for regex ---- | 457 // --- for regex ---- |
457 public function GetRegexFilenameById($topic_id) { | 458 private function GetRegexFilenameById($topic_id) { |
458 $query = sprintf("SELECT regex_filename FROM `topic_regex_relation` WHERE topic_id=\"%s\"", $topic_id); | 459 $query = sprintf("SELECT regex_filename FROM `topic_regex_relation` WHERE topic_id=\"%s\"", $topic_id); |
459 $result = mysql_query($query); | 460 $result = mysql_query($query); |
460 if (!$result) { | 461 if (!$result) { |
461 return json_encode("Failed during selecting topic_regex_relation table."); | 462 return json_encode("Failed during selecting topic_regex_relation table."); |
462 } | 463 } |
466 array_push($filenames, $row['regex_filename']); | 467 array_push($filenames, $row['regex_filename']); |
467 } | 468 } |
468 | 469 |
469 return $filenames; | 470 return $filenames; |
470 } | 471 } |
471 public function SmartRegexLoad($topic_id) { | 472 public function LoadSmartRegex($topic_id) { |
472 | 473 |
473 // Load regex file based on current topic. Only shows the regex in this topic -- | 474 // Load regex file based on current topic. Only shows the regex in this topic -- |
474 $filenames = $this->GetRegexFilenameById($topic_id); | 475 $filenames = $this->GetRegexFilenameById($topic_id); |
475 | 476 |
476 // Get regex file from filesystem ---- | 477 // Get regex file from filesystem ---- |
490 echo json_encode($returnArray); | 491 echo json_encode($returnArray); |
491 return; | 492 return; |
492 } | 493 } |
493 | 494 |
494 | 495 |
495 public function SmartRegexSave($_postdata) { | 496 public function SaveSmartRegex($_postdata) { |
496 if ($_postdata['text']){ | 497 if ($_postdata['text']){ |
497 | 498 |
498 // --- update topic_regex_relation table --- | 499 // --- update topic_regex_relation table --- |
499 $topic_id = $_postdata['topic_id']; | 500 $topic_id = $_postdata['topic_id']; |
500 $filename = $_postdata['filename'].'.txt'; | 501 $filename = $_postdata['filename'].'.txt'; |
557 } | 558 } |
558 $outputTableArray[0]["other"] = "其他"; | 559 $outputTableArray[0]["other"] = "其他"; |
559 $outputTableArray[0]["page"] = "頁數"; | 560 $outputTableArray[0]["page"] = "頁數"; |
560 $outputTableArray[0]["full"] = "全文"; | 561 $outputTableArray[0]["full"] = "全文"; |
561 | 562 |
563 // id, name, tag, color in _taglistArray | |
562 foreach ( $_taglistArray as $tagValue ) { | 564 foreach ( $_taglistArray as $tagValue ) { |
563 $content = preg_replace("/<\/".$tagValue[2].">○*<".$tagValue[2].">/u", "", $_content); | 565 $tag_name = $tagValue[2]; // $tagValue[2] is tag_name |
564 $content = preg_replace("/<".$tagValue[2].">[ ]*<\/".$tagValue[2].">/u", "", $_content); | 566 $content = preg_replace("/<\/".$tag_name.">○*<".$tag_name.">/u", "", $_content); |
567 $content = preg_replace("/<".$tag_name.">[ ]*<\/".$tag_name.">/u", "", $_content); | |
565 } | 568 } |
566 | 569 |
567 $contentLineArray = explode( "<br>", $content ); | 570 $contentLineArray = explode( "<br>", $content ); |
568 | 571 |
569 $count=0; | 572 $count=0; |
571 foreach ( $contentLineArray as $value ) { | 574 foreach ( $contentLineArray as $value ) { |
572 $count++; | 575 $count++; |
573 $recordString = $value; | 576 $recordString = $value; |
574 $otherString = $recordString; | 577 $otherString = $recordString; |
575 //echo $recordString."<br>\n"; | 578 //echo $recordString."<br>\n"; |
579 // find hyper link in pattern with <a>...</a> | |
576 if ( preg_match("/【<a(.*?)>(.*?)<\/a>】/u", $recordString, $matches) ) { | 580 if ( preg_match("/【<a(.*?)>(.*?)<\/a>】/u", $recordString, $matches) ) { |
577 $pageNow = $matches[2]; | 581 $pageNow = $matches[2]; |
578 } | 582 } |
579 foreach ( $_taglistArray as $tagValue ) { | 583 foreach ( $_taglistArray as $tagValue ) { |
580 if ( preg_match_all("/<".$tagValue[2].">(.*?)<\/".$tagValue[2].">/u", $recordString, $matches, PREG_SET_ORDER) ) { | 584 $tag_name = $tagValue[2]; // $tagValue[2] is tag_name |
585 | |
586 if ( preg_match_all("/<".$tag_name.">(.*?)<\/".$tag_name.">/u", $recordString, $matches, PREG_SET_ORDER) ) { | |
581 foreach ( $matches as $matchesValue ) { | 587 foreach ( $matches as $matchesValue ) { |
582 $matchesValue[1] = preg_replace("/○/u", "", $matchesValue[1]); | 588 $matchesValue[1] = preg_replace("/○/u", "", $matchesValue[1]); |
589 | |
583 if ( preg_match_all("/〈(.*?)〉/u", $matchesValue[1], $matches2, PREG_SET_ORDER) ) { | 590 if ( preg_match_all("/〈(.*?)〉/u", $matchesValue[1], $matches2, PREG_SET_ORDER) ) { |
584 foreach ( $matches2 as $matches2Value ) { | 591 foreach ( $matches2 as $matches2Value ) { |
585 if ( isset($outputTableArray[$count][0][$tagValue[2]]) ) { | 592 if ( isset($outputTableArray[$count][0][$tag_name]) ) { |
586 $outputTableArray[$count][0][$tagValue[2]] .= ";".$matches2Value[1]; | 593 $outputTableArray[$count][0][$tag_name] .= ";".$matches2Value[1]; |
587 } else { | 594 } else { |
588 $outputTableArray[$count][0][$tagValue[2]] = $matches2Value[1]; | 595 $outputTableArray[$count][0][$tag_name] = $matches2Value[1]; |
589 } | 596 } |
590 } | 597 } |
591 } else { | 598 } else { |
592 if ( isset($outputTableArray[$count][0][$tagValue[2]]) ) { | 599 if ( isset($outputTableArray[$count][0][$tag_name]) ) { |
593 $outputTableArray[$count][0][$tagValue[2]] .= ";".$matchesValue[1]; | 600 $outputTableArray[$count][0][$tag_name] .= ";".$matchesValue[1]; |
594 } else { | 601 } else { |
595 $outputTableArray[$count][0][$tagValue[2]] = $matchesValue[1]; | 602 $outputTableArray[$count][0][$tag_name] = $matchesValue[1]; |
596 } | 603 } |
597 } | 604 } |
598 } | 605 } |
599 $otherString = preg_replace("/<".$tagValue[2].">(.*?)<\/".$tagValue[2].">/u", " ", $otherString); | 606 $otherString = preg_replace("/<".$tag_name.">(.*?)<\/".$tag_name.">/u", " ", $otherString); |
600 } | 607 } |
601 } | 608 } |
602 $otherString = preg_replace("/○/u", "", $otherString); | 609 $otherString = preg_replace("/○/u", "", $otherString); |
603 $outputTableArray[$count]["other"] = $otherString; | 610 $outputTableArray[$count]["other"] = $otherString; |
604 $outputTableArray[$count]["page"] = $pageNow; | 611 $outputTableArray[$count]["page"] = $pageNow; |
643 $sectionName = $postdata['sectionName']; | 650 $sectionName = $postdata['sectionName']; |
644 $bookId = $postdata['bookId']; | 651 $bookId = $postdata['bookId']; |
645 $bookName = $postdata['bookName']; | 652 $bookName = $postdata['bookName']; |
646 | 653 |
647 //$taglistArray = $this->GetTaglistArray(); | 654 //$taglistArray = $this->GetTaglistArray(); |
648 $taglistArray = $this->GetTaglistByTopicID($topic); | 655 $taglistArray = $this->GetTaglistByTopicId($topic); |
649 } | 656 } |
650 // ===== | 657 // ===== |
651 | 658 |
652 $topic_tag = $this->GetTopicTag($topic); | 659 $topic_tag = $this->GetTopicTag($topic); |
653 | 660 |
681 $row = mysql_fetch_assoc($result); | 688 $row = mysql_fetch_assoc($result); |
682 $largest_id = $row['AUTO_INCREMENT']-1; | 689 $largest_id = $row['AUTO_INCREMENT']-1; |
683 | 690 |
684 | 691 |
685 $topic_id = $_postdata['topic_id']; | 692 $topic_id = $_postdata['topic_id']; |
686 $result = $this->GetTaglistByTopicID($topic_id); | 693 $result = $this->GetTaglistByTopicId($topic_id); |
687 | 694 |
688 $taglistArray = array(); | 695 $taglistArray = array(); |
689 | 696 |
690 foreach ($result as $row) { | 697 foreach ($result as $row) { |
691 $taglistArray[$row[0]] = array($row[1], $row[2], $row[3]); | 698 $taglistArray[$row[0]] = array($row[1], $row[2], $row[3]); |
782 /** | 789 /** |
783 * | 790 * |
784 */ | 791 */ |
785 | 792 |
786 $topic = $postdata['topic']; | 793 $topic = $postdata['topic']; |
787 $result = $this->GetTopicByID($topic); | 794 $result = $this->GetTopicById($topic); |
788 $row = mysql_fetch_assoc($result); | 795 $row = mysql_fetch_assoc($result); |
789 $topic_name_en = $row['name_en']; | 796 $topic_name_en = $row['name_en']; |
790 $topic_name_ch = $row['name_ch']; | 797 $topic_name_ch = $row['name_ch']; |
791 $topic_name_pinyin = $row['name_pinyin']; | 798 $topic_name_pinyin = $row['name_pinyin']; |
792 | 799 |
969 */ | 976 */ |
970 | 977 |
971 } | 978 } |
972 | 979 |
973 | 980 |
974 private function GetTaglistByTopicID($topic_id) { | 981 private function GetTaglistByTopicId($topic_id) { |
975 $taglistArray = array(); | 982 $taglistArray = array(); |
976 // select taglist ids from topic_tag_relation table | 983 // select taglist ids from topic_tag_relation table |
977 $query = sprintf("SELECT * FROM `topic_tag_relation` WHERE `topic_id`='%s'", $topic_id); | 984 $query = sprintf("SELECT * FROM `topic_tag_relation` WHERE `topic_id`='%s'", $topic_id); |
978 $result = mysql_query($query); | 985 $result = mysql_query($query); |
979 if (!$result) { | 986 if (!$result) { |
1003 | 1010 |
1004 public function SetTopic($topic) { | 1011 public function SetTopic($topic) { |
1005 $this->topic = $topic; | 1012 $this->topic = $topic; |
1006 } | 1013 } |
1007 | 1014 |
1008 public function GetTopic() { | 1015 private function GetTopic() { |
1009 return $this->topic; | 1016 return $this->topic; |
1010 } | 1017 } |
1011 | 1018 |
1012 private function GetTopiclistArray() { | 1019 private function GetTopiclistArray() { |
1013 $topiclistArray = array(); | 1020 $topiclistArray = array(); |
1014 $result = $this->GetTopiclist(); | 1021 $result = $this->GetTopicList(); |
1015 while ($row = mysql_fetch_assoc($result)) { | 1022 while ($row = mysql_fetch_assoc($result)) { |
1016 //array_push($topiclistArray, array('id'=>$row['id'],'name'=>$row['name'],'tag'=>$row['tag'])); | 1023 //array_push($topiclistArray, array('id'=>$row['id'],'name'=>$row['name'],'tag'=>$row['tag'])); |
1017 array_push($topiclistArray, array('id'=>$row['id'],'tag'=>$row['tag'],'name_en'=>$row['name_en'],'name_ch'=>$row['name_ch'],'name_pinyin'=>$row['name_pinyin'],)); | 1024 array_push($topiclistArray, array('id'=>$row['id'],'tag'=>$row['tag'],'name_en'=>$row['name_en'],'name_ch'=>$row['name_ch'],'name_pinyin'=>$row['name_pinyin'],)); |
1018 } | 1025 } |
1019 return $topiclistArray; | 1026 return $topiclistArray; |
1020 } | 1027 } |
1021 | 1028 |
1022 | 1029 |
1023 private function GetTopicTag($topic_id) { | 1030 private function GetTopicTag($topic_id) { |
1024 $result = $this->GetTopicByID($topic_id); | 1031 $result = $this->GetTopicById($topic_id); |
1025 $row = mysql_fetch_assoc($result); | 1032 $row = mysql_fetch_assoc($result); |
1026 $tag = $row['tag']; | 1033 $tag = $row['tag']; |
1027 return $tag; | 1034 return $tag; |
1028 | 1035 |
1029 } | 1036 } |
1030 private function GetTopicName($topic_id) { | 1037 private function GetTopicName($topic_id) { |
1031 $result = $this->GetTopicByID($topic_id); | 1038 $result = $this->GetTopicById($topic_id); |
1032 $row = mysql_fetch_assoc($result); | 1039 $row = mysql_fetch_assoc($result); |
1033 //$name = $row['name']; | 1040 //$name = $row['name']; |
1034 $name = array('name_en'=>$row['name_en'], 'name_ch'=>$row['name_ch'], 'name_pinyin'=>$row['name_pinyin']); | 1041 $name = array('name_en'=>$row['name_en'], 'name_ch'=>$row['name_ch'], 'name_pinyin'=>$row['name_pinyin']); |
1035 return $name; | 1042 return $name; |
1036 | 1043 |
1119 */ | 1126 */ |
1120 } | 1127 } |
1121 | 1128 |
1122 | 1129 |
1123 // ======================================= | 1130 // ======================================= |
1124 | 1131 |
1125 public function sortFunction($a,$b) { | 1132 private function sortFunction($a,$b) { |
1126 return strlen($b)-strlen($a); | 1133 return strlen($b)-strlen($a); |
1127 } | 1134 } |
1128 | 1135 |
1129 private function GetSectionId() { | 1136 private function GetSectionId() { |
1130 | 1137 |
1139 $section_id = $this->GetSectionId(); | 1146 $section_id = $this->GetSectionId(); |
1140 if (!is_numeric($section_id)){ | 1147 if (!is_numeric($section_id)){ |
1141 return $section_id; | 1148 return $section_id; |
1142 } | 1149 } |
1143 | 1150 |
1144 $result = $this->GetSectionsByID($section_id); | 1151 $result = $this->GetSectionsById($section_id); |
1145 | 1152 |
1146 | 1153 |
1147 while ($row = mysql_fetch_assoc($result)) { | 1154 while ($row = mysql_fetch_assoc($result)) { |
1148 $bookId=$row['books_id']; | 1155 $bookId=$row['books_id']; |
1149 $startPage=$row['start_page']; | 1156 $startPage=$row['start_page']; |
1265 } | 1272 } |
1266 | 1273 |
1267 /* | 1274 /* |
1268 // get book_meta from books table in db on localhost | 1275 // get book_meta from books table in db on localhost |
1269 $book_meta = array(); | 1276 $book_meta = array(); |
1270 $books_result = $this->GetBooksByID($bookId); | 1277 $books_result = $this->GetBooksById($bookId); |
1271 while ($row = mysql_fetch_assoc($books_result)) { | 1278 while ($row = mysql_fetch_assoc($books_result)) { |
1272 array_push($book_meta, array($row['id'],$row['name'],$row['author'],(string)$row['start_year'],(string)$row['line'],(string)$row['dynasty'])); | 1279 array_push($book_meta, array($row['id'],$row['name'],$row['author'],(string)$row['start_year'],(string)$row['line'],(string)$row['dynasty'])); |
1273 // use 'start_year' as year, 'line' is pagenumber | 1280 // use 'start_year' as year, 'line' is pagenumber |
1274 } | 1281 } |
1275 $this->book_meta = $book_meta; | 1282 $this->book_meta = $book_meta; |
1321 | 1328 |
1322 return $wordlistArray; | 1329 return $wordlistArray; |
1323 | 1330 |
1324 } | 1331 } |
1325 | 1332 |
1326 protected function GetBooksInfo($bookId) { | 1333 private function GetBooksInfo($bookId) { |
1327 $result = $this->GetSectionsByID($bookId); | 1334 $result = $this->GetSectionsById($bookId); |
1328 while ($row = mysql_fetch_assoc($result)) { | 1335 while ($row = mysql_fetch_assoc($result)) { |
1329 $bookName = $row['name']; | 1336 $bookName = $row['name']; |
1330 } | 1337 } |
1331 | 1338 |
1332 $data = array(); | 1339 $data = array(); |