Mercurial > hg > extraction-interface
comparison models/extractapp.php @ 74:60b5a94163c3 extractapp
New: retrieve book meta data from LGService
| author | Zoe Hong <zhong@mpiwg-berlin.mpg.de> |
|---|---|
| date | Thu, 09 Apr 2015 10:59:31 +0200 |
| parents | 2daef8e36214 |
| children | 75233b09c580 |
comparison
equal
deleted
inserted
replaced
| 73:2daef8e36214 | 74:60b5a94163c3 |
|---|---|
| 30 | 30 |
| 31 // get from URL with file_id | 31 // get from URL with file_id |
| 32 $lg_text_url = $this->get_text_from_fileId_url.$this->file_id; | 32 $lg_text_url = $this->get_text_from_fileId_url.$this->file_id; |
| 33 //$lg_text = file_get_contents($lg_text_url); | 33 //$lg_text = file_get_contents($lg_text_url); |
| 34 // --- get/set text and parsing meta data from text file | 34 // --- get/set text and parsing meta data from text file |
| 35 $lg_text = $this->ParseMetaData($lg_text_url); | 35 $lg_text = $this->ParseDataInFile($lg_text_url); |
| 36 // TODO: maybe need to check if the book_meta is updated. compare the book_meta in text file and from _post (most up-to-date) | |
| 37 | |
| 38 // ---- | |
| 39 | 36 |
| 40 | 37 |
| 41 $stringInput = $lg_text; | 38 $stringInput = $lg_text; |
| 42 $stringInput = preg_replace("/ /u", "○", $stringInput); | 39 $stringInput = preg_replace("/ /u", "○", $stringInput); |
| 43 $stringInput = preg_replace("/\n/u", "<br>", $stringInput); | 40 $stringInput = preg_replace("/\n/u", "<br>", $stringInput); |
| 93 $lg_text = $stringInput; | 90 $lg_text = $stringInput; |
| 94 | 91 |
| 95 $this->lg_text = $lg_text; | 92 $this->lg_text = $lg_text; |
| 96 } | 93 } |
| 97 | 94 |
| 98 public function GetTextFromLocal($_id){ | 95 |
| 96 // This is only used on localhost | |
| 97 public function GetTextFromLocal($_id){ | |
| 99 $this->section_id = $_id; | 98 $this->section_id = $_id; |
| 100 //$this->branch_id = 1; // local test sets branch_id to 1 | 99 //$this->branch_id = 1; // local test sets branch_id to 1 |
| 101 $this->messages['debug'] .= "[Debug] from my local"."<br>"; | 100 $this->messages['debug'] .= "[Debug] from my local"."<br>"; |
| 102 $this->lg_text = $this->GetSectionContent(); | 101 $this->lg_text = $this->GetSectionContent(); |
| 103 | 102 } |
| 104 } | 103 |
| 105 | 104 |
| 106 public function GetInfoFromPreviousPage($_postdata) { | 105 public function GetInfoFromPreviousPage($_postdata) { |
| 107 | 106 |
| 108 if (isset($_postdata['fileId'])) { | 107 if (isset($_postdata['fileId'])) { |
| 109 $this->file_id = $_postdata['fileId']; | 108 $this->file_id = $_postdata['fileId']; |
| 1119 | 1118 |
| 1120 | 1119 |
| 1121 return $data; | 1120 return $data; |
| 1122 } | 1121 } |
| 1123 | 1122 |
| 1124 private function ParseMetaData($filename) { | 1123 private function ParseDataInFile($filename) { |
| 1125 $text = file_get_contents($filename); | 1124 $text = file_get_contents($filename); |
| 1126 $xml = simplexml_load_string($text); //or die("Error: Cannot load from xml string"); | 1125 $xml = simplexml_load_string($text); //or die("Error: Cannot load from xml string"); |
| 1127 if (!$xml) { | 1126 if (!$xml) { |
| 1128 // when file created by section_id, read the plain text from file_get_contents | 1127 // when file created by section_id, read the plain text from file_get_contents |
| 1129 return $text; | 1128 return $text; |
| 1139 } | 1138 } |
| 1140 if ($taglistArray) { | 1139 if ($taglistArray) { |
| 1141 $this->taglist_infile = $taglistArray; | 1140 $this->taglist_infile = $taglistArray; |
| 1142 } | 1141 } |
| 1143 | 1142 |
| 1144 // get book meta data from file | 1143 // --- get book meta data from file, replaced by SetBookMetaDataBySectionId(), which retrives book meta data from LGService |
| 1144 /* | |
| 1145 $book_meta = $xml->book; | 1145 $book_meta = $xml->book; |
| 1146 $book_metaArray = array(); | 1146 $book_metaArray = array(); |
| 1147 foreach ($book_meta as $row) { | 1147 foreach ($book_meta as $row) { |
| 1148 array_push($book_metaArray, array((string)$row->id,(string)$row->name,(string)$row->level1,(string)$row->level2, | 1148 array_push($book_metaArray, array((string)$row->id,(string)$row->name,(string)$row->level1,(string)$row->level2, |
| 1149 (string)$row->period,(string)$row->dynasty,(string)$row->start_year,(string)$row->end_year,(string)$row->line, | 1149 (string)$row->period,(string)$row->dynasty,(string)$row->start_year,(string)$row->end_year,(string)$row->line, |
| 1150 (string)$row->volume,(string)$row->author,(string)$row->edition,(string)$row->in_jibengujiku,(string)$row->admin_type )); | 1150 (string)$row->volume,(string)$row->author,(string)$row->edition,(string)$row->in_jibengujiku,(string)$row->admin_type )); |
| 1151 } | 1151 } |
| 1152 if ($book_metaArray) { | 1152 if ($book_metaArray) { |
| 1153 $this->book_meta = $book_metaArray; | 1153 $this->book_meta = $book_metaArray; |
| 1154 } | 1154 } |
| 1155 | 1155 */ |
| 1156 // echo $taglist->name.", ".$taglist->tag."," .$taglist->color; | 1156 // ----- |
| 1157 // --- detect if the taglist set is up-to-date or not --- | |
| 1158 | 1157 |
| 1159 $contentString = (string)($xml->text_content->asXML()); | 1158 $contentString = (string)($xml->text_content->asXML()); |
| 1160 //$removed_str = array("<text_content>","</text_content>"); | |
| 1161 //$new_contentString = str_replace($removed_str, "", $contentString); | |
| 1162 | 1159 |
| 1163 return $contentString; | 1160 return $contentString; |
| 1164 } | 1161 } |
| 1165 private function GetSectionContent() { | 1162 |
| 1163 | |
| 1164 private function GetSectionContent() { // called only by GetTextFromLocal() | |
| 1166 $section_id = $this->GetSectionId(); | 1165 $section_id = $this->GetSectionId(); |
| 1167 $section_info = $this->GetSectionInfo(); | 1166 $section_info = $this->GetSectionInfo(); |
| 1168 | 1167 |
| 1169 $bookId = $section_info['bookId']; | 1168 $bookId = $section_info['bookId']; |
| 1170 $startPage = $section_info['startPage']; | 1169 $startPage = $section_info['startPage']; |
| 1174 $data_path = $this->GetDataPath(); | 1173 $data_path = $this->GetDataPath(); |
| 1175 if ( file_exists($data_path."parsing_files/".$section_id.".txt") ) { | 1174 if ( file_exists($data_path."parsing_files/".$section_id.".txt") ) { |
| 1176 $filename = $data_path."parsing_files/".$section_id.".txt"; | 1175 $filename = $data_path."parsing_files/".$section_id.".txt"; |
| 1177 | 1176 |
| 1178 // --- parsing meta data | 1177 // --- parsing meta data |
| 1179 $stringInput = $this->ParseMetaData($filename); | 1178 $stringInput = $this->ParseDataInFile($filename); |
| 1180 // ---- | 1179 // ---- |
| 1181 | 1180 |
| 1182 // if the text is from file system | 1181 // if the text is from file system |
| 1183 $stringInput = preg_replace("/ /u", "○", $stringInput); | 1182 $stringInput = preg_replace("/ /u", "○", $stringInput); |
| 1184 $stringInput = preg_replace("/\n/u", "<br>", $stringInput); | 1183 $stringInput = preg_replace("/\n/u", "<br>", $stringInput); |
| 1203 $stringInput = preg_replace("/\n/u", "<br>", $stringInput); | 1202 $stringInput = preg_replace("/\n/u", "<br>", $stringInput); |
| 1204 $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"review_index_xml_images.php?books_id=".$bookId."&pages=\\1&entry=0\" target=\"_bookImg\">\\1</a>】", $stringInput); | 1203 $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"review_index_xml_images.php?books_id=".$bookId."&pages=\\1&entry=0\" target=\"_bookImg\">\\1</a>】", $stringInput); |
| 1205 } | 1204 } |
| 1206 | 1205 |
| 1207 /* | 1206 /* |
| 1208 // get book_meta from books table in db | 1207 // get book_meta from books table in db on localhost |
| 1209 $book_meta = array(); | 1208 $book_meta = array(); |
| 1210 $books_result = $this->GetBooksByID($bookId); | 1209 $books_result = $this->GetBooksByID($bookId); |
| 1211 while ($row = mysql_fetch_assoc($books_result)) { | 1210 while ($row = mysql_fetch_assoc($books_result)) { |
| 1212 array_push($book_meta, array($row['id'],$row['name'],$row['author'],(string)$row['start_year'],(string)$row['line'],(string)$row['dynasty'])); | 1211 array_push($book_meta, array($row['id'],$row['name'],$row['author'],(string)$row['start_year'],(string)$row['line'],(string)$row['dynasty'])); |
| 1213 // use 'start_year' as year, 'line' is pagenumber | 1212 // use 'start_year' as year, 'line' is pagenumber |
| 1214 } | 1213 } |
| 1215 $this->book_meta = $book_meta; | 1214 $this->book_meta = $book_meta; |
| 1216 */ | 1215 */ |
| 1217 | |
| 1218 $this->SetBookMetaDataBySectionId(); // get book_meta by section_id from LGServices and set $this->book_meta | |
| 1219 | 1216 |
| 1220 return $stringInput; | 1217 return $stringInput; |
| 1221 } | 1218 } |
| 1222 | 1219 |
| 1223 private function GetDataPath() { | 1220 private function GetDataPath() { |
