Mercurial > hg > extraction-interface
comparison models/extractapp.php @ 51:840cdb52f476 extractapp
add book_meta into file
| author | Zoe Hong <zhong@mpiwg-berlin.mpg.de> |
|---|---|
| date | Thu, 26 Mar 2015 15:42:50 +0100 |
| parents | c98a0c6d7eb4 |
| children | 94064f625650 |
comparison
equal
deleted
inserted
replaced
| 50:3e42a63de0ea | 51:840cdb52f476 |
|---|---|
| 31 $this->book_name = $_postdata['bookName']; | 31 $this->book_name = $_postdata['bookName']; |
| 32 | 32 |
| 33 // get from URL with file_id | 33 // get from URL with file_id |
| 34 $lg_text_url = $this->get_text_from_fileId_url.$this->file_id; | 34 $lg_text_url = $this->get_text_from_fileId_url.$this->file_id; |
| 35 //$lg_text = file_get_contents($lg_text_url); | 35 //$lg_text = file_get_contents($lg_text_url); |
| 36 // --- parsing meta data | 36 // --- get/set text and parsing meta data from text file |
| 37 $lg_text = $this->ParseMetaData($lg_text_url); | 37 $lg_text = $this->ParseMetaData($lg_text_url); |
| 38 // TODO: maybe need to check if the book_meta is updated. compare the book_meta in text file and from _post (most up-to-date) | |
| 38 | 39 |
| 39 // ---- | 40 // ---- |
| 40 | 41 |
| 41 | 42 |
| 42 $stringInput = $lg_text; | 43 $stringInput = $lg_text; |
| 59 $this->book_name = $_postdata['bookName']; | 60 $this->book_name = $_postdata['bookName']; |
| 60 | 61 |
| 61 // get from URL with file_id | 62 // get from URL with file_id |
| 62 $lg_text_url = $this->get_text_from_sectionId_url.$section_id; | 63 $lg_text_url = $this->get_text_from_sectionId_url.$section_id; |
| 63 $lg_text = file_get_contents($lg_text_url); | 64 $lg_text = file_get_contents($lg_text_url); |
| 65 | |
| 66 // TODO: get book_meta from $_postdata and set $this->book_meta: book_id,book_name,author,year,pagenumber | |
| 67 $book_meta = array(); | |
| 68 array_push($book_meta, array($this->book_id,$this->book_name,"","","")); // missing author,year,pagenumber | |
| 69 | |
| 70 $this->book_meta = $book_meta; | |
| 71 | |
| 72 | |
| 64 | 73 |
| 65 $stringInput = $lg_text; | 74 $stringInput = $lg_text; |
| 66 $stringInput = preg_replace("/<(.*?)>/u", "○", $stringInput); | 75 $stringInput = preg_replace("/<(.*?)>/u", "○", $stringInput); |
| 67 $stringInput = preg_replace("/ /u", "○", $stringInput); | 76 $stringInput = preg_replace("/ /u", "○", $stringInput); |
| 68 $stringInput = preg_replace("/\n/u", "<br>", $stringInput); | 77 $stringInput = preg_replace("/\n/u", "<br>", $stringInput); |
| 355 $text .= "<topic>".$this->topic."</topic>\n"; | 364 $text .= "<topic>".$this->topic."</topic>\n"; |
| 356 // book meta data | 365 // book meta data |
| 357 $book = $this->book_meta; | 366 $book = $this->book_meta; |
| 358 foreach ($book as $b) { | 367 foreach ($book as $b) { |
| 359 $text .= "<book>\n"; | 368 $text .= "<book>\n"; |
| 360 $text .= "<title>".$b[0]."</title>\n"; | 369 $text .= "<id>".$b[0]."</id>\n"; |
| 361 $text .= "<author>".$b[1]."</author>\n"; | 370 $text .= "<name>".$b[1]."</name>\n"; |
| 362 $text .= "<year>".$b[2]."</year>\n"; | 371 $text .= "<author>".$b[2]."</author>\n"; |
| 363 $text .= "<pagenumber>".$b[3]."</pagenumber>\n"; | 372 $text .= "<year>".$b[3]."</year>\n"; |
| 373 $text .= "<pagenumber>".$b[4]."</pagenumber>\n"; | |
| 364 $text .= "</book>\n"; | 374 $text .= "</book>\n"; |
| 365 } | 375 } |
| 366 // taglist | 376 // taglist |
| 367 $taglist = $this->taglist_infile; | 377 $taglist = $this->taglist_infile; |
| 368 foreach ($taglist as $tagitem) { | 378 foreach ($taglist as $tagitem) { |
| 380 | 390 |
| 381 return $text; | 391 return $text; |
| 382 } | 392 } |
| 383 public function SaveFullTextToLGService($_postdata) { | 393 public function SaveFullTextToLGService($_postdata) { |
| 384 // save tagged text (full text) by Jorge's API to lg service | 394 // save tagged text (full text) by Jorge's API to lg service |
| 385 | |
| 386 // -------- | 395 // -------- |
| 387 if ($_postdata['text']){ | 396 if ($_postdata['text']){ |
| 388 $date = date('Y_m_d_H_i_s', time()); | 397 $date = date('Y_m_d_H_i_s', time()); |
| 389 if ( file_exists("data/parsing_files/".$_postdata['sectionId'].".txt") ) { | 398 if ( file_exists("data/parsing_files/".$_postdata['sectionId'].".txt") ) { |
| 390 $oldFile = file_get_contents("data/parsing_files/".$_postdata['sectionId'].".txt"); | 399 $oldFile = file_get_contents("data/parsing_files/".$_postdata['sectionId'].".txt"); |
| 706 $result = mysql_query($query); | 715 $result = mysql_query($query); |
| 707 if (!$result) { | 716 if (!$result) { |
| 708 echo json_encode(mysql_error()); | 717 echo json_encode(mysql_error()); |
| 709 } | 718 } |
| 710 $row = mysql_fetch_assoc($result); | 719 $row = mysql_fetch_assoc($result); |
| 711 $largest_id = $row['AUTO_INCREMENT']; | 720 $largest_id = $row['AUTO_INCREMENT']-1; |
| 712 | 721 |
| 713 | 722 |
| 714 $topic_id = $_postdata['topic_id']; | 723 $topic_id = $_postdata['topic_id']; |
| 715 $result = $this->GetTaglistByTopicID($topic_id); | 724 $result = $this->GetTaglistByTopicID($topic_id); |
| 716 | 725 |
| 1215 } | 1224 } |
| 1216 if ($taglistArray) { | 1225 if ($taglistArray) { |
| 1217 $this->taglist_infile = $taglistArray; | 1226 $this->taglist_infile = $taglistArray; |
| 1218 } | 1227 } |
| 1219 | 1228 |
| 1220 // get book meta data | 1229 // get book meta data from file |
| 1221 $book_meta = $xml->book; | 1230 $book_meta = $xml->book; |
| 1222 $book_metaArray = array(); | 1231 $book_metaArray = array(); |
| 1223 foreach ($book_meta as $row) { | 1232 foreach ($book_meta as $row) { |
| 1224 //array_push($book_metaArray, array('title'=>(string)$row->title,'author'=>(string)$row->author,'year'=>(string)$row->year,'pagenumber'=>(string)$row->pagenumber )); | 1233 array_push($book_metaArray, array((string)$row->id,(string)$row->name,(string)$row->author,(string)$row->year,(string)$row->pagenumber )); |
| 1225 array_push($book_metaArray, array((string)$row->title,(string)$row->author,(string)$row->year,(string)$row->pagenumber )); | |
| 1226 } | 1234 } |
| 1227 if ($book_metaArray) { | 1235 if ($book_metaArray) { |
| 1228 $this->book_meta = $book_metaArray; | 1236 $this->book_meta = $book_metaArray; |
| 1229 } | 1237 } |
| 1230 | 1238 |
| 1272 $stringInput = preg_replace("/<(.*?)>/u", "○", $stringInput); | 1280 $stringInput = preg_replace("/<(.*?)>/u", "○", $stringInput); |
| 1273 $stringInput = preg_replace("/ /u", "○", $stringInput); | 1281 $stringInput = preg_replace("/ /u", "○", $stringInput); |
| 1274 $stringInput = preg_replace("/\n/u", "<br>", $stringInput); | 1282 $stringInput = preg_replace("/\n/u", "<br>", $stringInput); |
| 1275 $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"review_index_xml_images.php?books_id=".$bookId."&pages=\\1&entry=0\" target=\"_bookImg\">\\1</a>】", $stringInput); | 1283 $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"review_index_xml_images.php?books_id=".$bookId."&pages=\\1&entry=0\" target=\"_bookImg\">\\1</a>】", $stringInput); |
| 1276 } | 1284 } |
| 1285 | |
| 1286 // get book_meta from books table | |
| 1287 $book_meta = array(); | |
| 1288 $books_result = $this->GetBooksByID($bookId); | |
| 1289 while ($row = mysql_fetch_assoc($books_result)) { | |
| 1290 array_push($book_meta, array($row['id'],$row['name'],$row['author'],(string)$row['start_year'],(string)$row['line'])); | |
| 1291 // use 'start_year' as year, 'line' is pagenumber | |
| 1292 } | |
| 1293 | |
| 1294 $this->book_meta = $book_meta; | |
| 1295 | |
| 1277 | 1296 |
| 1278 return $stringInput; | 1297 return $stringInput; |
| 1279 } | 1298 } |
| 1280 | 1299 |
| 1281 private function GetDataPath() { | 1300 private function GetDataPath() { |
