comparison models/extractapp.php @ 74:60b5a94163c3 extractapp

New: retrieve book meta data from LGService
author Zoe Hong <zhong@mpiwg-berlin.mpg.de>
date Thu, 09 Apr 2015 10:59:31 +0200
parents 2daef8e36214
children 75233b09c580
comparison
equal deleted inserted replaced
73:2daef8e36214 74:60b5a94163c3
30 30
31 // get from URL with file_id 31 // get from URL with file_id
32 $lg_text_url = $this->get_text_from_fileId_url.$this->file_id; 32 $lg_text_url = $this->get_text_from_fileId_url.$this->file_id;
33 //$lg_text = file_get_contents($lg_text_url); 33 //$lg_text = file_get_contents($lg_text_url);
34 // --- get/set text and parsing meta data from text file 34 // --- get/set text and parsing meta data from text file
35 $lg_text = $this->ParseMetaData($lg_text_url); 35 $lg_text = $this->ParseDataInFile($lg_text_url);
36 // TODO: maybe need to check if the book_meta is updated. compare the book_meta in text file and from _post (most up-to-date)
37
38 // ----
39 36
40 37
41 $stringInput = $lg_text; 38 $stringInput = $lg_text;
42 $stringInput = preg_replace("/ /u", "○", $stringInput); 39 $stringInput = preg_replace("/ /u", "○", $stringInput);
43 $stringInput = preg_replace("/\n/u", "<br>", $stringInput); 40 $stringInput = preg_replace("/\n/u", "<br>", $stringInput);
93 $lg_text = $stringInput; 90 $lg_text = $stringInput;
94 91
95 $this->lg_text = $lg_text; 92 $this->lg_text = $lg_text;
96 } 93 }
97 94
98 public function GetTextFromLocal($_id){ 95
96 // This is only used on localhost
97 public function GetTextFromLocal($_id){
99 $this->section_id = $_id; 98 $this->section_id = $_id;
100 //$this->branch_id = 1; // local test sets branch_id to 1 99 //$this->branch_id = 1; // local test sets branch_id to 1
101 $this->messages['debug'] .= "[Debug] from my local"."<br>"; 100 $this->messages['debug'] .= "[Debug] from my local"."<br>";
102 $this->lg_text = $this->GetSectionContent(); 101 $this->lg_text = $this->GetSectionContent();
103 102 }
104 } 103
105 104
106 public function GetInfoFromPreviousPage($_postdata) { 105 public function GetInfoFromPreviousPage($_postdata) {
107 106
108 if (isset($_postdata['fileId'])) { 107 if (isset($_postdata['fileId'])) {
109 $this->file_id = $_postdata['fileId']; 108 $this->file_id = $_postdata['fileId'];
1119 1118
1120 1119
1121 return $data; 1120 return $data;
1122 } 1121 }
1123 1122
1124 private function ParseMetaData($filename) { 1123 private function ParseDataInFile($filename) {
1125 $text = file_get_contents($filename); 1124 $text = file_get_contents($filename);
1126 $xml = simplexml_load_string($text); //or die("Error: Cannot load from xml string"); 1125 $xml = simplexml_load_string($text); //or die("Error: Cannot load from xml string");
1127 if (!$xml) { 1126 if (!$xml) {
1128 // when file created by section_id, read the plain text from file_get_contents 1127 // when file created by section_id, read the plain text from file_get_contents
1129 return $text; 1128 return $text;
1139 } 1138 }
1140 if ($taglistArray) { 1139 if ($taglistArray) {
1141 $this->taglist_infile = $taglistArray; 1140 $this->taglist_infile = $taglistArray;
1142 } 1141 }
1143 1142
1144 // get book meta data from file 1143 // --- get book meta data from file, replaced by SetBookMetaDataBySectionId(), which retrives book meta data from LGService
1144 /*
1145 $book_meta = $xml->book; 1145 $book_meta = $xml->book;
1146 $book_metaArray = array(); 1146 $book_metaArray = array();
1147 foreach ($book_meta as $row) { 1147 foreach ($book_meta as $row) {
1148 array_push($book_metaArray, array((string)$row->id,(string)$row->name,(string)$row->level1,(string)$row->level2, 1148 array_push($book_metaArray, array((string)$row->id,(string)$row->name,(string)$row->level1,(string)$row->level2,
1149 (string)$row->period,(string)$row->dynasty,(string)$row->start_year,(string)$row->end_year,(string)$row->line, 1149 (string)$row->period,(string)$row->dynasty,(string)$row->start_year,(string)$row->end_year,(string)$row->line,
1150 (string)$row->volume,(string)$row->author,(string)$row->edition,(string)$row->in_jibengujiku,(string)$row->admin_type )); 1150 (string)$row->volume,(string)$row->author,(string)$row->edition,(string)$row->in_jibengujiku,(string)$row->admin_type ));
1151 } 1151 }
1152 if ($book_metaArray) { 1152 if ($book_metaArray) {
1153 $this->book_meta = $book_metaArray; 1153 $this->book_meta = $book_metaArray;
1154 } 1154 }
1155 1155 */
1156 // echo $taglist->name.", ".$taglist->tag."," .$taglist->color; 1156 // -----
1157 // --- detect if the taglist set is up-to-date or not ---
1158 1157
1159 $contentString = (string)($xml->text_content->asXML()); 1158 $contentString = (string)($xml->text_content->asXML());
1160 //$removed_str = array("<text_content>","</text_content>");
1161 //$new_contentString = str_replace($removed_str, "", $contentString);
1162 1159
1163 return $contentString; 1160 return $contentString;
1164 } 1161 }
1165 private function GetSectionContent() { 1162
1163
1164 private function GetSectionContent() { // called only by GetTextFromLocal()
1166 $section_id = $this->GetSectionId(); 1165 $section_id = $this->GetSectionId();
1167 $section_info = $this->GetSectionInfo(); 1166 $section_info = $this->GetSectionInfo();
1168 1167
1169 $bookId = $section_info['bookId']; 1168 $bookId = $section_info['bookId'];
1170 $startPage = $section_info['startPage']; 1169 $startPage = $section_info['startPage'];
1174 $data_path = $this->GetDataPath(); 1173 $data_path = $this->GetDataPath();
1175 if ( file_exists($data_path."parsing_files/".$section_id.".txt") ) { 1174 if ( file_exists($data_path."parsing_files/".$section_id.".txt") ) {
1176 $filename = $data_path."parsing_files/".$section_id.".txt"; 1175 $filename = $data_path."parsing_files/".$section_id.".txt";
1177 1176
1178 // --- parsing meta data 1177 // --- parsing meta data
1179 $stringInput = $this->ParseMetaData($filename); 1178 $stringInput = $this->ParseDataInFile($filename);
1180 // ---- 1179 // ----
1181 1180
1182 // if the text is from file system 1181 // if the text is from file system
1183 $stringInput = preg_replace("/ /u", "○", $stringInput); 1182 $stringInput = preg_replace("/ /u", "○", $stringInput);
1184 $stringInput = preg_replace("/\n/u", "<br>", $stringInput); 1183 $stringInput = preg_replace("/\n/u", "<br>", $stringInput);
1203 $stringInput = preg_replace("/\n/u", "<br>", $stringInput); 1202 $stringInput = preg_replace("/\n/u", "<br>", $stringInput);
1204 $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"review_index_xml_images.php?books_id=".$bookId."&pages=\\1&entry=0\" target=\"_bookImg\">\\1</a>】", $stringInput); 1203 $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"review_index_xml_images.php?books_id=".$bookId."&pages=\\1&entry=0\" target=\"_bookImg\">\\1</a>】", $stringInput);
1205 } 1204 }
1206 1205
1207 /* 1206 /*
1208 // get book_meta from books table in db 1207 // get book_meta from books table in db on localhost
1209 $book_meta = array(); 1208 $book_meta = array();
1210 $books_result = $this->GetBooksByID($bookId); 1209 $books_result = $this->GetBooksByID($bookId);
1211 while ($row = mysql_fetch_assoc($books_result)) { 1210 while ($row = mysql_fetch_assoc($books_result)) {
1212 array_push($book_meta, array($row['id'],$row['name'],$row['author'],(string)$row['start_year'],(string)$row['line'],(string)$row['dynasty'])); 1211 array_push($book_meta, array($row['id'],$row['name'],$row['author'],(string)$row['start_year'],(string)$row['line'],(string)$row['dynasty']));
1213 // use 'start_year' as year, 'line' is pagenumber 1212 // use 'start_year' as year, 'line' is pagenumber
1214 } 1213 }
1215 $this->book_meta = $book_meta; 1214 $this->book_meta = $book_meta;
1216 */ 1215 */
1217
1218 $this->SetBookMetaDataBySectionId(); // get book_meta by section_id from LGServices and set $this->book_meta
1219 1216
1220 return $stringInput; 1217 return $stringInput;
1221 } 1218 }
1222 1219
1223 private function GetDataPath() { 1220 private function GetDataPath() {