Mercurial > hg > extraction-interface
comparison models/extractapp.php @ 74:60b5a94163c3 extractapp
New: retrieve book meta data from LGService
author | Zoe Hong <zhong@mpiwg-berlin.mpg.de> |
---|---|
date | Thu, 09 Apr 2015 10:59:31 +0200 |
parents | 2daef8e36214 |
children | 75233b09c580 |
comparison
equal
deleted
inserted
replaced
73:2daef8e36214 | 74:60b5a94163c3 |
---|---|
30 | 30 |
31 // get from URL with file_id | 31 // get from URL with file_id |
32 $lg_text_url = $this->get_text_from_fileId_url.$this->file_id; | 32 $lg_text_url = $this->get_text_from_fileId_url.$this->file_id; |
33 //$lg_text = file_get_contents($lg_text_url); | 33 //$lg_text = file_get_contents($lg_text_url); |
34 // --- get/set text and parsing meta data from text file | 34 // --- get/set text and parsing meta data from text file |
35 $lg_text = $this->ParseMetaData($lg_text_url); | 35 $lg_text = $this->ParseDataInFile($lg_text_url); |
36 // TODO: maybe need to check if the book_meta is updated. compare the book_meta in text file and from _post (most up-to-date) | |
37 | |
38 // ---- | |
39 | 36 |
40 | 37 |
41 $stringInput = $lg_text; | 38 $stringInput = $lg_text; |
42 $stringInput = preg_replace("/ /u", "○", $stringInput); | 39 $stringInput = preg_replace("/ /u", "○", $stringInput); |
43 $stringInput = preg_replace("/\n/u", "<br>", $stringInput); | 40 $stringInput = preg_replace("/\n/u", "<br>", $stringInput); |
93 $lg_text = $stringInput; | 90 $lg_text = $stringInput; |
94 | 91 |
95 $this->lg_text = $lg_text; | 92 $this->lg_text = $lg_text; |
96 } | 93 } |
97 | 94 |
98 public function GetTextFromLocal($_id){ | 95 |
96 // This is only used on localhost | |
97 public function GetTextFromLocal($_id){ | |
99 $this->section_id = $_id; | 98 $this->section_id = $_id; |
100 //$this->branch_id = 1; // local test sets branch_id to 1 | 99 //$this->branch_id = 1; // local test sets branch_id to 1 |
101 $this->messages['debug'] .= "[Debug] from my local"."<br>"; | 100 $this->messages['debug'] .= "[Debug] from my local"."<br>"; |
102 $this->lg_text = $this->GetSectionContent(); | 101 $this->lg_text = $this->GetSectionContent(); |
103 | 102 } |
104 } | 103 |
105 | 104 |
106 public function GetInfoFromPreviousPage($_postdata) { | 105 public function GetInfoFromPreviousPage($_postdata) { |
107 | 106 |
108 if (isset($_postdata['fileId'])) { | 107 if (isset($_postdata['fileId'])) { |
109 $this->file_id = $_postdata['fileId']; | 108 $this->file_id = $_postdata['fileId']; |
1119 | 1118 |
1120 | 1119 |
1121 return $data; | 1120 return $data; |
1122 } | 1121 } |
1123 | 1122 |
1124 private function ParseMetaData($filename) { | 1123 private function ParseDataInFile($filename) { |
1125 $text = file_get_contents($filename); | 1124 $text = file_get_contents($filename); |
1126 $xml = simplexml_load_string($text); //or die("Error: Cannot load from xml string"); | 1125 $xml = simplexml_load_string($text); //or die("Error: Cannot load from xml string"); |
1127 if (!$xml) { | 1126 if (!$xml) { |
1128 // when file created by section_id, read the plain text from file_get_contents | 1127 // when file created by section_id, read the plain text from file_get_contents |
1129 return $text; | 1128 return $text; |
1139 } | 1138 } |
1140 if ($taglistArray) { | 1139 if ($taglistArray) { |
1141 $this->taglist_infile = $taglistArray; | 1140 $this->taglist_infile = $taglistArray; |
1142 } | 1141 } |
1143 | 1142 |
1144 // get book meta data from file | 1143 // --- get book meta data from file, replaced by SetBookMetaDataBySectionId(), which retrives book meta data from LGService |
1144 /* | |
1145 $book_meta = $xml->book; | 1145 $book_meta = $xml->book; |
1146 $book_metaArray = array(); | 1146 $book_metaArray = array(); |
1147 foreach ($book_meta as $row) { | 1147 foreach ($book_meta as $row) { |
1148 array_push($book_metaArray, array((string)$row->id,(string)$row->name,(string)$row->level1,(string)$row->level2, | 1148 array_push($book_metaArray, array((string)$row->id,(string)$row->name,(string)$row->level1,(string)$row->level2, |
1149 (string)$row->period,(string)$row->dynasty,(string)$row->start_year,(string)$row->end_year,(string)$row->line, | 1149 (string)$row->period,(string)$row->dynasty,(string)$row->start_year,(string)$row->end_year,(string)$row->line, |
1150 (string)$row->volume,(string)$row->author,(string)$row->edition,(string)$row->in_jibengujiku,(string)$row->admin_type )); | 1150 (string)$row->volume,(string)$row->author,(string)$row->edition,(string)$row->in_jibengujiku,(string)$row->admin_type )); |
1151 } | 1151 } |
1152 if ($book_metaArray) { | 1152 if ($book_metaArray) { |
1153 $this->book_meta = $book_metaArray; | 1153 $this->book_meta = $book_metaArray; |
1154 } | 1154 } |
1155 | 1155 */ |
1156 // echo $taglist->name.", ".$taglist->tag."," .$taglist->color; | 1156 // ----- |
1157 // --- detect if the taglist set is up-to-date or not --- | |
1158 | 1157 |
1159 $contentString = (string)($xml->text_content->asXML()); | 1158 $contentString = (string)($xml->text_content->asXML()); |
1160 //$removed_str = array("<text_content>","</text_content>"); | |
1161 //$new_contentString = str_replace($removed_str, "", $contentString); | |
1162 | 1159 |
1163 return $contentString; | 1160 return $contentString; |
1164 } | 1161 } |
1165 private function GetSectionContent() { | 1162 |
1163 | |
1164 private function GetSectionContent() { // called only by GetTextFromLocal() | |
1166 $section_id = $this->GetSectionId(); | 1165 $section_id = $this->GetSectionId(); |
1167 $section_info = $this->GetSectionInfo(); | 1166 $section_info = $this->GetSectionInfo(); |
1168 | 1167 |
1169 $bookId = $section_info['bookId']; | 1168 $bookId = $section_info['bookId']; |
1170 $startPage = $section_info['startPage']; | 1169 $startPage = $section_info['startPage']; |
1174 $data_path = $this->GetDataPath(); | 1173 $data_path = $this->GetDataPath(); |
1175 if ( file_exists($data_path."parsing_files/".$section_id.".txt") ) { | 1174 if ( file_exists($data_path."parsing_files/".$section_id.".txt") ) { |
1176 $filename = $data_path."parsing_files/".$section_id.".txt"; | 1175 $filename = $data_path."parsing_files/".$section_id.".txt"; |
1177 | 1176 |
1178 // --- parsing meta data | 1177 // --- parsing meta data |
1179 $stringInput = $this->ParseMetaData($filename); | 1178 $stringInput = $this->ParseDataInFile($filename); |
1180 // ---- | 1179 // ---- |
1181 | 1180 |
1182 // if the text is from file system | 1181 // if the text is from file system |
1183 $stringInput = preg_replace("/ /u", "○", $stringInput); | 1182 $stringInput = preg_replace("/ /u", "○", $stringInput); |
1184 $stringInput = preg_replace("/\n/u", "<br>", $stringInput); | 1183 $stringInput = preg_replace("/\n/u", "<br>", $stringInput); |
1203 $stringInput = preg_replace("/\n/u", "<br>", $stringInput); | 1202 $stringInput = preg_replace("/\n/u", "<br>", $stringInput); |
1204 $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"review_index_xml_images.php?books_id=".$bookId."&pages=\\1&entry=0\" target=\"_bookImg\">\\1</a>】", $stringInput); | 1203 $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"review_index_xml_images.php?books_id=".$bookId."&pages=\\1&entry=0\" target=\"_bookImg\">\\1</a>】", $stringInput); |
1205 } | 1204 } |
1206 | 1205 |
1207 /* | 1206 /* |
1208 // get book_meta from books table in db | 1207 // get book_meta from books table in db on localhost |
1209 $book_meta = array(); | 1208 $book_meta = array(); |
1210 $books_result = $this->GetBooksByID($bookId); | 1209 $books_result = $this->GetBooksByID($bookId); |
1211 while ($row = mysql_fetch_assoc($books_result)) { | 1210 while ($row = mysql_fetch_assoc($books_result)) { |
1212 array_push($book_meta, array($row['id'],$row['name'],$row['author'],(string)$row['start_year'],(string)$row['line'],(string)$row['dynasty'])); | 1211 array_push($book_meta, array($row['id'],$row['name'],$row['author'],(string)$row['start_year'],(string)$row['line'],(string)$row['dynasty'])); |
1213 // use 'start_year' as year, 'line' is pagenumber | 1212 // use 'start_year' as year, 'line' is pagenumber |
1214 } | 1213 } |
1215 $this->book_meta = $book_meta; | 1214 $this->book_meta = $book_meta; |
1216 */ | 1215 */ |
1217 | |
1218 $this->SetBookMetaDataBySectionId(); // get book_meta by section_id from LGServices and set $this->book_meta | |
1219 | 1216 |
1220 return $stringInput; | 1217 return $stringInput; |
1221 } | 1218 } |
1222 | 1219 |
1223 private function GetDataPath() { | 1220 private function GetDataPath() { |