Mercurial > hg > extraction-interface
comparison models/extractapp.php @ 51:840cdb52f476 extractapp
add book_meta into file
author | Zoe Hong <zhong@mpiwg-berlin.mpg.de> |
---|---|
date | Thu, 26 Mar 2015 15:42:50 +0100 |
parents | c98a0c6d7eb4 |
children | 94064f625650 |
comparison
equal
deleted
inserted
replaced
50:3e42a63de0ea | 51:840cdb52f476 |
---|---|
31 $this->book_name = $_postdata['bookName']; | 31 $this->book_name = $_postdata['bookName']; |
32 | 32 |
33 // get from URL with file_id | 33 // get from URL with file_id |
34 $lg_text_url = $this->get_text_from_fileId_url.$this->file_id; | 34 $lg_text_url = $this->get_text_from_fileId_url.$this->file_id; |
35 //$lg_text = file_get_contents($lg_text_url); | 35 //$lg_text = file_get_contents($lg_text_url); |
36 // --- parsing meta data | 36 // --- get/set text and parsing meta data from text file |
37 $lg_text = $this->ParseMetaData($lg_text_url); | 37 $lg_text = $this->ParseMetaData($lg_text_url); |
38 // TODO: maybe need to check if the book_meta is updated. compare the book_meta in text file and from _post (most up-to-date) | |
38 | 39 |
39 // ---- | 40 // ---- |
40 | 41 |
41 | 42 |
42 $stringInput = $lg_text; | 43 $stringInput = $lg_text; |
59 $this->book_name = $_postdata['bookName']; | 60 $this->book_name = $_postdata['bookName']; |
60 | 61 |
61 // get from URL with file_id | 62 // get from URL with file_id |
62 $lg_text_url = $this->get_text_from_sectionId_url.$section_id; | 63 $lg_text_url = $this->get_text_from_sectionId_url.$section_id; |
63 $lg_text = file_get_contents($lg_text_url); | 64 $lg_text = file_get_contents($lg_text_url); |
65 | |
66 // TODO: get book_meta from $_postdata and set $this->book_meta: book_id,book_name,author,year,pagenumber | |
67 $book_meta = array(); | |
68 array_push($book_meta, array($this->book_id,$this->book_name,"","","")); // missing author,year,pagenumber | |
69 | |
70 $this->book_meta = $book_meta; | |
71 | |
72 | |
64 | 73 |
65 $stringInput = $lg_text; | 74 $stringInput = $lg_text; |
66 $stringInput = preg_replace("/<(.*?)>/u", "○", $stringInput); | 75 $stringInput = preg_replace("/<(.*?)>/u", "○", $stringInput); |
67 $stringInput = preg_replace("/ /u", "○", $stringInput); | 76 $stringInput = preg_replace("/ /u", "○", $stringInput); |
68 $stringInput = preg_replace("/\n/u", "<br>", $stringInput); | 77 $stringInput = preg_replace("/\n/u", "<br>", $stringInput); |
355 $text .= "<topic>".$this->topic."</topic>\n"; | 364 $text .= "<topic>".$this->topic."</topic>\n"; |
356 // book meta data | 365 // book meta data |
357 $book = $this->book_meta; | 366 $book = $this->book_meta; |
358 foreach ($book as $b) { | 367 foreach ($book as $b) { |
359 $text .= "<book>\n"; | 368 $text .= "<book>\n"; |
360 $text .= "<title>".$b[0]."</title>\n"; | 369 $text .= "<id>".$b[0]."</id>\n"; |
361 $text .= "<author>".$b[1]."</author>\n"; | 370 $text .= "<name>".$b[1]."</name>\n"; |
362 $text .= "<year>".$b[2]."</year>\n"; | 371 $text .= "<author>".$b[2]."</author>\n"; |
363 $text .= "<pagenumber>".$b[3]."</pagenumber>\n"; | 372 $text .= "<year>".$b[3]."</year>\n"; |
373 $text .= "<pagenumber>".$b[4]."</pagenumber>\n"; | |
364 $text .= "</book>\n"; | 374 $text .= "</book>\n"; |
365 } | 375 } |
366 // taglist | 376 // taglist |
367 $taglist = $this->taglist_infile; | 377 $taglist = $this->taglist_infile; |
368 foreach ($taglist as $tagitem) { | 378 foreach ($taglist as $tagitem) { |
380 | 390 |
381 return $text; | 391 return $text; |
382 } | 392 } |
383 public function SaveFullTextToLGService($_postdata) { | 393 public function SaveFullTextToLGService($_postdata) { |
384 // save tagged text (full text) by Jorge's API to lg service | 394 // save tagged text (full text) by Jorge's API to lg service |
385 | |
386 // -------- | 395 // -------- |
387 if ($_postdata['text']){ | 396 if ($_postdata['text']){ |
388 $date = date('Y_m_d_H_i_s', time()); | 397 $date = date('Y_m_d_H_i_s', time()); |
389 if ( file_exists("data/parsing_files/".$_postdata['sectionId'].".txt") ) { | 398 if ( file_exists("data/parsing_files/".$_postdata['sectionId'].".txt") ) { |
390 $oldFile = file_get_contents("data/parsing_files/".$_postdata['sectionId'].".txt"); | 399 $oldFile = file_get_contents("data/parsing_files/".$_postdata['sectionId'].".txt"); |
706 $result = mysql_query($query); | 715 $result = mysql_query($query); |
707 if (!$result) { | 716 if (!$result) { |
708 echo json_encode(mysql_error()); | 717 echo json_encode(mysql_error()); |
709 } | 718 } |
710 $row = mysql_fetch_assoc($result); | 719 $row = mysql_fetch_assoc($result); |
711 $largest_id = $row['AUTO_INCREMENT']; | 720 $largest_id = $row['AUTO_INCREMENT']-1; |
712 | 721 |
713 | 722 |
714 $topic_id = $_postdata['topic_id']; | 723 $topic_id = $_postdata['topic_id']; |
715 $result = $this->GetTaglistByTopicID($topic_id); | 724 $result = $this->GetTaglistByTopicID($topic_id); |
716 | 725 |
1215 } | 1224 } |
1216 if ($taglistArray) { | 1225 if ($taglistArray) { |
1217 $this->taglist_infile = $taglistArray; | 1226 $this->taglist_infile = $taglistArray; |
1218 } | 1227 } |
1219 | 1228 |
1220 // get book meta data | 1229 // get book meta data from file |
1221 $book_meta = $xml->book; | 1230 $book_meta = $xml->book; |
1222 $book_metaArray = array(); | 1231 $book_metaArray = array(); |
1223 foreach ($book_meta as $row) { | 1232 foreach ($book_meta as $row) { |
1224 //array_push($book_metaArray, array('title'=>(string)$row->title,'author'=>(string)$row->author,'year'=>(string)$row->year,'pagenumber'=>(string)$row->pagenumber )); | 1233 array_push($book_metaArray, array((string)$row->id,(string)$row->name,(string)$row->author,(string)$row->year,(string)$row->pagenumber )); |
1225 array_push($book_metaArray, array((string)$row->title,(string)$row->author,(string)$row->year,(string)$row->pagenumber )); | |
1226 } | 1234 } |
1227 if ($book_metaArray) { | 1235 if ($book_metaArray) { |
1228 $this->book_meta = $book_metaArray; | 1236 $this->book_meta = $book_metaArray; |
1229 } | 1237 } |
1230 | 1238 |
1272 $stringInput = preg_replace("/<(.*?)>/u", "○", $stringInput); | 1280 $stringInput = preg_replace("/<(.*?)>/u", "○", $stringInput); |
1273 $stringInput = preg_replace("/ /u", "○", $stringInput); | 1281 $stringInput = preg_replace("/ /u", "○", $stringInput); |
1274 $stringInput = preg_replace("/\n/u", "<br>", $stringInput); | 1282 $stringInput = preg_replace("/\n/u", "<br>", $stringInput); |
1275 $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"review_index_xml_images.php?books_id=".$bookId."&pages=\\1&entry=0\" target=\"_bookImg\">\\1</a>】", $stringInput); | 1283 $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"review_index_xml_images.php?books_id=".$bookId."&pages=\\1&entry=0\" target=\"_bookImg\">\\1</a>】", $stringInput); |
1276 } | 1284 } |
1285 | |
1286 // get book_meta from books table | |
1287 $book_meta = array(); | |
1288 $books_result = $this->GetBooksByID($bookId); | |
1289 while ($row = mysql_fetch_assoc($books_result)) { | |
1290 array_push($book_meta, array($row['id'],$row['name'],$row['author'],(string)$row['start_year'],(string)$row['line'])); | |
1291 // use 'start_year' as year, 'line' is pagenumber | |
1292 } | |
1293 | |
1294 $this->book_meta = $book_meta; | |
1295 | |
1277 | 1296 |
1278 return $stringInput; | 1297 return $stringInput; |
1279 } | 1298 } |
1280 | 1299 |
1281 private function GetDataPath() { | 1300 private function GetDataPath() { |