Mercurial > hg > extraction-interface
comparison develop/models/extractapp.php @ 40:2e938dc046db extractapp
load,save xml file with topic, etc.
| author | Zoe Hong <zhong@mpiwg-berlin.mpg.de> |
|---|---|
| date | Tue, 10 Mar 2015 13:46:16 +0100 |
| parents | 8347776a44fc |
| children | 533a6c39c128 |
comparison
equal
deleted
inserted
replaced
| 39:8347776a44fc | 40:2e938dc046db |
|---|---|
| 16 $this->SetSectionId($_urlvalues); | 16 $this->SetSectionId($_urlvalues); |
| 17 | 17 |
| 18 } | 18 } |
| 19 | 19 |
| 20 public function GetTextFromFileId($_postdata) { | 20 public function GetTextFromFileId($_postdata) { |
| 21 /* | |
| 22 if ($this->current_fileId != 0) { | |
| 23 $this->file_id = $this->current_fileId; | |
| 24 } else { | |
| 25 $this->file_id = $_postdata['fileId']; | |
| 26 } | |
| 27 */ | |
| 28 $this->file_id = $_postdata['fileId']; | 21 $this->file_id = $_postdata['fileId']; |
| 29 $branch_id = $_postdata['branchId']; | 22 $branch_id = $_postdata['branchId']; |
| 30 $section_id = $_postdata['sectionId']; | 23 $section_id = $_postdata['sectionId']; |
| 31 | 24 |
| 32 $this->branch_id = $branch_id; | 25 $this->branch_id = $branch_id; |
| 33 $this->user_id = $_postdata['userId']; | 26 $this->user_id = $_postdata['userId']; |
| 34 $this->section_id = $section_id; | 27 $this->section_id = $section_id; |
| 35 | 28 |
| 36 | |
| 37 $this->section_name = $_postdata['sectionName']; | 29 $this->section_name = $_postdata['sectionName']; |
| 38 $this->book_id = $_postdata['bookId']; | 30 $this->book_id = $_postdata['bookId']; |
| 39 $this->book_name = $_postdata['bookName']; | 31 $this->book_name = $_postdata['bookName']; |
| 40 | 32 |
| 41 | |
| 42 // get from URL with file_id | 33 // get from URL with file_id |
| 43 $lg_text_url = $this->get_text_from_fileId_url.$this->file_id; | 34 $lg_text_url = $this->get_text_from_fileId_url.$this->file_id; |
| 44 $lg_text = file_get_contents($lg_text_url); | 35 //$lg_text = file_get_contents($lg_text_url); |
| 36 // --- parsing meta data | |
| 37 $lg_text = $this->ParseMetaData($lg_text_url); | |
| 38 | |
| 39 // ---- | |
| 40 | |
| 45 | 41 |
| 46 $stringInput = $lg_text; | 42 $stringInput = $lg_text; |
| 47 $stringInput = preg_replace("/ /u", "○", $stringInput); | 43 $stringInput = preg_replace("/ /u", "○", $stringInput); |
| 48 $stringInput = preg_replace("/\n/u", "<br>", $stringInput); | 44 $stringInput = preg_replace("/\n/u", "<br>", $stringInput); |
| 49 $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"review_index_xml_images.php?books_id=".$bookId."&pages=\\1&entry=0\" target=\"_bookImg\">\\1</a>】", $stringInput); | 45 $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"review_index_xml_images.php?books_id=".$bookId."&pages=\\1&entry=0\" target=\"_bookImg\">\\1</a>】", $stringInput); |
| 52 $this->lg_text = $lg_text; | 48 $this->lg_text = $lg_text; |
| 53 | 49 |
| 54 } | 50 } |
| 55 | 51 |
| 56 public function GetTextFromSectionId($_postdata) { | 52 public function GetTextFromSectionId($_postdata) { |
| 57 | |
| 58 $section_id = $_postdata['sectionId']; | 53 $section_id = $_postdata['sectionId']; |
| 59 $this->section_id = $section_id; | 54 $this->section_id = $section_id; |
| 60 $this->user_id = $_postdata['userId']; | 55 $this->user_id = $_postdata['userId']; |
| 61 | 56 |
| 62 $this->section_name = $_postdata['sectionName']; | 57 $this->section_name = $_postdata['sectionName']; |
| 63 $this->book_id = $_postdata['bookId']; | 58 $this->book_id = $_postdata['bookId']; |
| 64 $this->book_name = $_postdata['bookName']; | 59 $this->book_name = $_postdata['bookName']; |
| 65 | 60 |
| 66 // get from URL with file_id | 61 // get from URL with file_id |
| 67 $lg_text_url = $this->get_text_from_sectionId_url.$section_id; | 62 $lg_text_url = $this->get_text_from_sectionId_url.$section_id; |
| 68 | |
| 69 $lg_text = file_get_contents($lg_text_url); | 63 $lg_text = file_get_contents($lg_text_url); |
| 70 | 64 |
| 71 $stringInput = $lg_text; | 65 $stringInput = $lg_text; |
| 72 $stringInput = preg_replace("/<(.*?)>/u", "○", $stringInput); | 66 $stringInput = preg_replace("/<(.*?)>/u", "○", $stringInput); |
| 73 $stringInput = preg_replace("/ /u", "○", $stringInput); | 67 $stringInput = preg_replace("/ /u", "○", $stringInput); |
| 116 $this->book_id = $_postdata['bookId']; | 110 $this->book_id = $_postdata['bookId']; |
| 117 } | 111 } |
| 118 if ($_postdata['currentFileId']) { | 112 if ($_postdata['currentFileId']) { |
| 119 $this->current_fileId = $_postdata['currentFileId']; | 113 $this->current_fileId = $_postdata['currentFileId']; |
| 120 } | 114 } |
| 121 | 115 if ($_postdata['taglistArray']) { |
| 122 | 116 $this->taglist_infile = json_decode($_postdata['taglistArray']); |
| 117 } | |
| 118 if ($_postdata['book_meta']) { | |
| 119 $this->book_meta = json_decode($_postdata['book_meta']); | |
| 120 } | |
| 123 | 121 |
| 124 } | 122 } |
| 125 public function InitData($_postdata) { | 123 public function InitData($_postdata) { |
| 126 $file_id = $_postdata['fileId']; | 124 $file_id = $_postdata['fileId']; |
| 127 $branch_id = $_postdata['branchId']; | 125 $branch_id = $_postdata['branchId']; |
| 192 | 190 |
| 193 } | 191 } |
| 194 */ | 192 */ |
| 195 | 193 |
| 196 | 194 |
| 195 // TODO: comparison not correct | |
| 197 private function Taglist_infileUpToDate($taglistArray) { | 196 private function Taglist_infileUpToDate($taglistArray) { |
| 198 // compare $this->taglist_infile is the same as $taglistArray | 197 // compare $this->taglist_infile is the same as $taglistArray |
| 199 $taglist_infile = $this->taglist_infile; | 198 $taglist_infile = $this->taglist_infile; |
| 200 if (count($taglist_infile) != count($taglistArray)) { | 199 if (count($taglist_infile) != count($taglistArray)) { |
| 201 return false; | 200 return false; |
| 202 } | 201 } |
| 203 | 202 |
| 204 foreach ($taglistArray as $row_indb) { | 203 foreach ($taglistArray as $row_indb) { |
| 204 $cnt = 0; | |
| 205 foreach ($taglist_infile as $row) { | 205 foreach ($taglist_infile as $row) { |
| 206 //$taglistArray: array( $row['id'], $row['name'], $row['tag'], $row['color'] ) | 206 //$taglistArray: array( $row['id'], $row['name'], $row['tag'], $row['color'] ) |
| 207 if ($row[0] == $row_indb[0] && $row[1] == $row_indb[1] && $row[2] == $row_indb[2] && $row[3] == $row_indb[3]) { | 207 if ($row[0] == $row_indb[0] && $row[1] == $row_indb[1] && $row[2] == $row_indb[2] && $row[3] == $row_indb[3]) { |
| 208 | 208 |
| 209 } else { | 209 } else { |
| 210 return false; | 210 $cnt ++; |
| 211 } | 211 } |
| 212 } | |
| 213 if ($cnt == count($row)) { | |
| 214 return false; | |
| 212 } | 215 } |
| 213 } | 216 } |
| 214 return true; | 217 return true; |
| 215 } | 218 } |
| 216 // === for tagging === | 219 // === for tagging === |
| 223 | 226 |
| 224 //$taglistArray = $this->GetTaglistArray(); | 227 //$taglistArray = $this->GetTaglistArray(); |
| 225 //for GetTaglistByTopicID: | 228 //for GetTaglistByTopicID: |
| 226 $taglistArray = $this->GetTaglistByTopicID($this->GetTopic()); | 229 $taglistArray = $this->GetTaglistByTopicID($this->GetTopic()); |
| 227 | 230 |
| 231 $data['taglist_infile'] = $this->taglist_infile; | |
| 228 // TODO: check if taglist_infile is up-to-date | 232 // TODO: check if taglist_infile is up-to-date |
| 233 /* | |
| 229 if ( !$this->Taglist_infileUpToDate($taglistArray)) { | 234 if ( !$this->Taglist_infileUpToDate($taglistArray)) { |
| 230 $data['taglist_infile'] = $this->taglist_infile; | 235 $data['taglist_infile'] = $this->taglist_infile; |
| 231 } | 236 } else { |
| 232 | 237 $data['taglist_infile'] = ""; |
| 238 } | |
| 239 */ | |
| 240 | |
| 241 // book_meta | |
| 242 $data['book_meta'] = $this->book_meta; | |
| 233 | 243 |
| 234 // topic list | 244 // topic list |
| 235 $topiclistArray = $this->GetTopiclistArray(); | 245 $topiclistArray = $this->GetTopiclistArray(); |
| 236 | 246 |
| 237 | 247 |
| 336 | 346 |
| 337 } | 347 } |
| 338 | 348 |
| 339 | 349 |
| 340 } | 350 } |
| 351 private function AppendMetaData($text_content) { | |
| 352 $text = '<?xml version="1.0" encoding="UTF-8"?>'; | |
| 353 $text .= "\n<text>\n"; | |
| 354 // topic | |
| 355 $text .= "<topic>".$this->topic."</topic>\n"; | |
| 356 // book meta data | |
| 357 $book = $this->book_meta; | |
| 358 foreach ($book as $b) { | |
| 359 $text .= "<book>\n"; | |
| 360 $text .= "<title>".$b[0]."</title>\n"; | |
| 361 $text .= "<author>".$b[1]."</author>\n"; | |
| 362 $text .= "<year>".$b[2]."</year>\n"; | |
| 363 $text .= "<pagenumber>".$b[3]."</pagenumber>\n"; | |
| 364 $text .= "</book>\n"; | |
| 365 } | |
| 366 // taglist | |
| 367 $taglist = $this->taglist_infile; | |
| 368 foreach ($taglist as $tagitem) { | |
| 369 $text .= "<tagitem>\n"; | |
| 370 $text .= "<id>".$tagitem[0]."</id>\n"; | |
| 371 $text .= "<name>".$tagitem[1]."</name>\n"; | |
| 372 $text .= "<tag>".$tagitem[2]."</tag>\n"; | |
| 373 $text .= "<color>".$tagitem[3]."</color>\n"; | |
| 374 $text .= "</tagitem>\n"; | |
| 375 } | |
| 376 | |
| 377 // text_content | |
| 378 $text .= $text_content; | |
| 379 $text .= "\n</text>"; | |
| 380 | |
| 381 return $text; | |
| 382 } | |
| 341 public function SaveFullTextToLGService($_postdata) { | 383 public function SaveFullTextToLGService($_postdata) { |
| 342 // save tagged text (full text) by Jorge's API to lg service | 384 // save tagged text (full text) by Jorge's API to lg service |
| 343 | 385 |
| 344 // -------- | 386 // -------- |
| 345 if ($_postdata['text']){ | 387 if ($_postdata['text']){ |
| 353 if (get_magic_quotes_gpc()) { | 395 if (get_magic_quotes_gpc()) { |
| 354 $require = stripslashes($_postdata['text']); | 396 $require = stripslashes($_postdata['text']); |
| 355 } else { | 397 } else { |
| 356 $require = $_postdata['text']; | 398 $require = $_postdata['text']; |
| 357 } | 399 } |
| 358 | 400 |
| 359 $require = preg_replace("/【<a(.*?)>(.*?)<\/a>】/u", "【\\2】", $require); | 401 $require = preg_replace("/【<a(.*?)>(.*?)<\/a>】/u", "【\\2】", $require); |
| 360 $require = preg_replace('/&/u', "&", $require); | 402 $require = preg_replace('/&/u', "&", $require); |
| 361 $require = preg_replace("/○/u", " ", $require); | 403 $require = preg_replace("/○/u", " ", $require); |
| 362 $require = preg_replace("/<br>/u", "\n", $require); | 404 $require = preg_replace("/<br>/u", "\n", $require); |
| 363 $require = preg_replace("/<br>/u", "\n", $require); | 405 //$require = preg_replace("/<br>/u", "\n", $require); |
| 406 | |
| 407 // TODO: append metadata at the beginning of file | |
| 408 if ($_postdata['branchId'] == 0) { | |
| 409 // -- new branch case | |
| 410 $require = "<text_content>\n".$require."</text_content>\n"; | |
| 411 } | |
| 412 $require = $this->AppendMetaData($require); | |
| 413 | |
| 364 //saving in my local machine in developing phrase | 414 //saving in my local machine in developing phrase |
| 365 file_put_contents("data/parsing_files/".$_postdata['sectionId'].".txt", $require); | 415 file_put_contents("data/parsing_files/".$_postdata['sectionId'].".txt", $require); |
| 366 } | 416 } |
| 367 | 417 |
| 368 // ------ | 418 // ------ |
| 407 curl_setopt($ch, CURLOPT_HTTPHEADER, array( | 457 curl_setopt($ch, CURLOPT_HTTPHEADER, array( |
| 408 'Content-type: multipart/form-data;charset=utf-8' | 458 'Content-type: multipart/form-data;charset=utf-8' |
| 409 )); | 459 )); |
| 410 | 460 |
| 411 // execute the request | 461 // execute the request |
| 412 $output = curl_exec($ch); | 462 // **** commended to DEBUG *** |
| 413 | 463 //$output = curl_exec($ch); |
| 464 // ***** | |
| 414 | 465 |
| 415 // output the profile information - includes the header | 466 // output the profile information - includes the header |
| 416 //echo($output) . PHP_EOL; | 467 //echo($output) . PHP_EOL; |
| 417 | 468 |
| 418 // close curl resource to free up system resources | 469 // close curl resource to free up system resources |
| 1145 | 1196 |
| 1146 private function ParseMetaData($filename) { | 1197 private function ParseMetaData($filename) { |
| 1147 $text = file_get_contents($filename); | 1198 $text = file_get_contents($filename); |
| 1148 $xml = simplexml_load_string($text) or die("Error: Cannot load from xml string"); | 1199 $xml = simplexml_load_string($text) or die("Error: Cannot load from xml string"); |
| 1149 | 1200 |
| 1150 $this->topic = $xml->topic; // set topic id | 1201 $this->topic = (string)$xml->topic; // set topic id |
| 1151 | 1202 |
| 1152 // get taglist in file | 1203 // get taglist in file |
| 1153 $taglist_infile = $xml->taglist; | 1204 $taglist_infile = $xml->tagitem; |
| 1154 $taglistArray = array(); | 1205 $taglistArray = array(); |
| 1155 foreach ($taglist_infile as $row) { | 1206 foreach ($taglist_infile as $row) { |
| 1156 array_push($taglistArray, array((string)$row->id, (string)$row->name, (string)$row->tag, (string)$row->color)); | 1207 array_push($taglistArray, array((string)$row->id,(string)$row->name,(string)$row->tag,(string)$row->color )); |
| 1157 } | 1208 } |
| 1158 $this->taglist_infile = $taglistArray; | 1209 if ($taglistArray) { |
| 1159 | 1210 $this->taglist_infile = $taglistArray; |
| 1160 // TODO: get meta data of book | 1211 } |
| 1161 $this->book_meta = $xml->book; | 1212 |
| 1213 // get book meta data | |
| 1214 $book_meta = $xml->book; | |
| 1215 $book_metaArray = array(); | |
| 1216 foreach ($book_meta as $row) { | |
| 1217 //array_push($book_metaArray, array('title'=>(string)$row->title,'author'=>(string)$row->author,'year'=>(string)$row->year,'pagenumber'=>(string)$row->pagenumber )); | |
| 1218 array_push($book_metaArray, array((string)$row->title,(string)$row->author,(string)$row->year,(string)$row->pagenumber )); | |
| 1219 } | |
| 1220 if ($book_metaArray) { | |
| 1221 $this->book_meta = $book_metaArray; | |
| 1222 } | |
| 1162 | 1223 |
| 1163 // echo $taglist->name.", ".$taglist->tag."," .$taglist->color; | 1224 // echo $taglist->name.", ".$taglist->tag."," .$taglist->color; |
| 1164 // --- detect if the taglist set is up-to-date or not --- | 1225 // --- detect if the taglist set is up-to-date or not --- |
| 1165 | 1226 $contentString = (string)$xml->text_content->asXML(); |
| 1166 | 1227 //$removed_str = array("<text_content>","</text_content>"); |
| 1167 $contentString = $xml->text_content->asXML(); | 1228 //$new_contentString = str_replace($removed_str, "", $contentString); |
| 1168 | 1229 |
| 1169 return $contentString; | 1230 return $contentString; |
| 1170 } | 1231 } |
| 1171 private function GetSectionContent() { | 1232 private function GetSectionContent() { |
| 1172 $section_id = $this->GetSectionId(); | 1233 $section_id = $this->GetSectionId(); |
| 1178 | 1239 |
| 1179 $contentString=""; | 1240 $contentString=""; |
| 1180 $data_path = $this->GetDataPath(); | 1241 $data_path = $this->GetDataPath(); |
| 1181 if ( file_exists($data_path."parsing_files/".$section_id.".txt") ) { | 1242 if ( file_exists($data_path."parsing_files/".$section_id.".txt") ) { |
| 1182 $filename = $data_path."parsing_files/".$section_id.".txt"; | 1243 $filename = $data_path."parsing_files/".$section_id.".txt"; |
| 1183 // --- parsing meta data | 1244 |
| 1245 // --- parsing meta data | |
| 1184 $stringInput = $this->ParseMetaData($filename); | 1246 $stringInput = $this->ParseMetaData($filename); |
| 1185 // ---- | 1247 // ---- |
| 1186 | 1248 |
| 1187 // if the text is from file system | 1249 // if the text is from file system |
| 1188 $stringInput = preg_replace("/ /u", "○", $stringInput); | 1250 $stringInput = preg_replace("/ /u", "○", $stringInput); |
