comparison develop/models/extractapp.php @ 40:2e938dc046db extractapp

load,save xml file with topic, etc.
author Zoe Hong <zhong@mpiwg-berlin.mpg.de>
date Tue, 10 Mar 2015 13:46:16 +0100
parents 8347776a44fc
children 533a6c39c128
comparison
equal deleted inserted replaced
39:8347776a44fc 40:2e938dc046db
16 $this->SetSectionId($_urlvalues); 16 $this->SetSectionId($_urlvalues);
17 17
18 } 18 }
19 19
20 public function GetTextFromFileId($_postdata) { 20 public function GetTextFromFileId($_postdata) {
21 /*
22 if ($this->current_fileId != 0) {
23 $this->file_id = $this->current_fileId;
24 } else {
25 $this->file_id = $_postdata['fileId'];
26 }
27 */
28 $this->file_id = $_postdata['fileId']; 21 $this->file_id = $_postdata['fileId'];
29 $branch_id = $_postdata['branchId']; 22 $branch_id = $_postdata['branchId'];
30 $section_id = $_postdata['sectionId']; 23 $section_id = $_postdata['sectionId'];
31 24
32 $this->branch_id = $branch_id; 25 $this->branch_id = $branch_id;
33 $this->user_id = $_postdata['userId']; 26 $this->user_id = $_postdata['userId'];
34 $this->section_id = $section_id; 27 $this->section_id = $section_id;
35 28
36
37 $this->section_name = $_postdata['sectionName']; 29 $this->section_name = $_postdata['sectionName'];
38 $this->book_id = $_postdata['bookId']; 30 $this->book_id = $_postdata['bookId'];
39 $this->book_name = $_postdata['bookName']; 31 $this->book_name = $_postdata['bookName'];
40 32
41
42 // get from URL with file_id 33 // get from URL with file_id
43 $lg_text_url = $this->get_text_from_fileId_url.$this->file_id; 34 $lg_text_url = $this->get_text_from_fileId_url.$this->file_id;
44 $lg_text = file_get_contents($lg_text_url); 35 //$lg_text = file_get_contents($lg_text_url);
36 // --- parsing meta data
37 $lg_text = $this->ParseMetaData($lg_text_url);
38
39 // ----
40
45 41
46 $stringInput = $lg_text; 42 $stringInput = $lg_text;
47 $stringInput = preg_replace("/ /u", "○", $stringInput); 43 $stringInput = preg_replace("/ /u", "○", $stringInput);
48 $stringInput = preg_replace("/\n/u", "<br>", $stringInput); 44 $stringInput = preg_replace("/\n/u", "<br>", $stringInput);
49 $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"review_index_xml_images.php?books_id=".$bookId."&pages=\\1&entry=0\" target=\"_bookImg\">\\1</a>】", $stringInput); 45 $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"review_index_xml_images.php?books_id=".$bookId."&pages=\\1&entry=0\" target=\"_bookImg\">\\1</a>】", $stringInput);
52 $this->lg_text = $lg_text; 48 $this->lg_text = $lg_text;
53 49
54 } 50 }
55 51
56 public function GetTextFromSectionId($_postdata) { 52 public function GetTextFromSectionId($_postdata) {
57
58 $section_id = $_postdata['sectionId']; 53 $section_id = $_postdata['sectionId'];
59 $this->section_id = $section_id; 54 $this->section_id = $section_id;
60 $this->user_id = $_postdata['userId']; 55 $this->user_id = $_postdata['userId'];
61 56
62 $this->section_name = $_postdata['sectionName']; 57 $this->section_name = $_postdata['sectionName'];
63 $this->book_id = $_postdata['bookId']; 58 $this->book_id = $_postdata['bookId'];
64 $this->book_name = $_postdata['bookName']; 59 $this->book_name = $_postdata['bookName'];
65 60
66 // get from URL with file_id 61 // get from URL with file_id
67 $lg_text_url = $this->get_text_from_sectionId_url.$section_id; 62 $lg_text_url = $this->get_text_from_sectionId_url.$section_id;
68
69 $lg_text = file_get_contents($lg_text_url); 63 $lg_text = file_get_contents($lg_text_url);
70 64
71 $stringInput = $lg_text; 65 $stringInput = $lg_text;
72 $stringInput = preg_replace("/<(.*?)>/u", "○", $stringInput); 66 $stringInput = preg_replace("/<(.*?)>/u", "○", $stringInput);
73 $stringInput = preg_replace("/ /u", "○", $stringInput); 67 $stringInput = preg_replace("/ /u", "○", $stringInput);
116 $this->book_id = $_postdata['bookId']; 110 $this->book_id = $_postdata['bookId'];
117 } 111 }
118 if ($_postdata['currentFileId']) { 112 if ($_postdata['currentFileId']) {
119 $this->current_fileId = $_postdata['currentFileId']; 113 $this->current_fileId = $_postdata['currentFileId'];
120 } 114 }
121 115 if ($_postdata['taglistArray']) {
122 116 $this->taglist_infile = json_decode($_postdata['taglistArray']);
117 }
118 if ($_postdata['book_meta']) {
119 $this->book_meta = json_decode($_postdata['book_meta']);
120 }
123 121
124 } 122 }
125 public function InitData($_postdata) { 123 public function InitData($_postdata) {
126 $file_id = $_postdata['fileId']; 124 $file_id = $_postdata['fileId'];
127 $branch_id = $_postdata['branchId']; 125 $branch_id = $_postdata['branchId'];
192 190
193 } 191 }
194 */ 192 */
195 193
196 194
195 // TODO: comparison not correct
197 private function Taglist_infileUpToDate($taglistArray) { 196 private function Taglist_infileUpToDate($taglistArray) {
198 // compare $this->taglist_infile is the same as $taglistArray 197 // compare $this->taglist_infile is the same as $taglistArray
199 $taglist_infile = $this->taglist_infile; 198 $taglist_infile = $this->taglist_infile;
200 if (count($taglist_infile) != count($taglistArray)) { 199 if (count($taglist_infile) != count($taglistArray)) {
201 return false; 200 return false;
202 } 201 }
203 202
204 foreach ($taglistArray as $row_indb) { 203 foreach ($taglistArray as $row_indb) {
204 $cnt = 0;
205 foreach ($taglist_infile as $row) { 205 foreach ($taglist_infile as $row) {
206 //$taglistArray: array( $row['id'], $row['name'], $row['tag'], $row['color'] ) 206 //$taglistArray: array( $row['id'], $row['name'], $row['tag'], $row['color'] )
207 if ($row[0] == $row_indb[0] && $row[1] == $row_indb[1] && $row[2] == $row_indb[2] && $row[3] == $row_indb[3]) { 207 if ($row[0] == $row_indb[0] && $row[1] == $row_indb[1] && $row[2] == $row_indb[2] && $row[3] == $row_indb[3]) {
208 208
209 } else { 209 } else {
210 return false; 210 $cnt ++;
211 } 211 }
212 }
213 if ($cnt == count($row)) {
214 return false;
212 } 215 }
213 } 216 }
214 return true; 217 return true;
215 } 218 }
216 // === for tagging === 219 // === for tagging ===
223 226
224 //$taglistArray = $this->GetTaglistArray(); 227 //$taglistArray = $this->GetTaglistArray();
225 //for GetTaglistByTopicID: 228 //for GetTaglistByTopicID:
226 $taglistArray = $this->GetTaglistByTopicID($this->GetTopic()); 229 $taglistArray = $this->GetTaglistByTopicID($this->GetTopic());
227 230
231 $data['taglist_infile'] = $this->taglist_infile;
228 // TODO: check if taglist_infile is up-to-date 232 // TODO: check if taglist_infile is up-to-date
233 /*
229 if ( !$this->Taglist_infileUpToDate($taglistArray)) { 234 if ( !$this->Taglist_infileUpToDate($taglistArray)) {
230 $data['taglist_infile'] = $this->taglist_infile; 235 $data['taglist_infile'] = $this->taglist_infile;
231 } 236 } else {
232 237 $data['taglist_infile'] = "";
238 }
239 */
240
241 // book_meta
242 $data['book_meta'] = $this->book_meta;
233 243
234 // topic list 244 // topic list
235 $topiclistArray = $this->GetTopiclistArray(); 245 $topiclistArray = $this->GetTopiclistArray();
236 246
237 247
336 346
337 } 347 }
338 348
339 349
340 } 350 }
351 private function AppendMetaData($text_content) {
352 $text = '<?xml version="1.0" encoding="UTF-8"?>';
353 $text .= "\n<text>\n";
354 // topic
355 $text .= "<topic>".$this->topic."</topic>\n";
356 // book meta data
357 $book = $this->book_meta;
358 foreach ($book as $b) {
359 $text .= "<book>\n";
360 $text .= "<title>".$b[0]."</title>\n";
361 $text .= "<author>".$b[1]."</author>\n";
362 $text .= "<year>".$b[2]."</year>\n";
363 $text .= "<pagenumber>".$b[3]."</pagenumber>\n";
364 $text .= "</book>\n";
365 }
366 // taglist
367 $taglist = $this->taglist_infile;
368 foreach ($taglist as $tagitem) {
369 $text .= "<tagitem>\n";
370 $text .= "<id>".$tagitem[0]."</id>\n";
371 $text .= "<name>".$tagitem[1]."</name>\n";
372 $text .= "<tag>".$tagitem[2]."</tag>\n";
373 $text .= "<color>".$tagitem[3]."</color>\n";
374 $text .= "</tagitem>\n";
375 }
376
377 // text_content
378 $text .= $text_content;
379 $text .= "\n</text>";
380
381 return $text;
382 }
341 public function SaveFullTextToLGService($_postdata) { 383 public function SaveFullTextToLGService($_postdata) {
342 // save tagged text (full text) by Jorge's API to lg service 384 // save tagged text (full text) by Jorge's API to lg service
343 385
344 // -------- 386 // --------
345 if ($_postdata['text']){ 387 if ($_postdata['text']){
353 if (get_magic_quotes_gpc()) { 395 if (get_magic_quotes_gpc()) {
354 $require = stripslashes($_postdata['text']); 396 $require = stripslashes($_postdata['text']);
355 } else { 397 } else {
356 $require = $_postdata['text']; 398 $require = $_postdata['text'];
357 } 399 }
358 400
359 $require = preg_replace("/【<a(.*?)>(.*?)<\/a>】/u", "【\\2】", $require); 401 $require = preg_replace("/【<a(.*?)>(.*?)<\/a>】/u", "【\\2】", $require);
360 $require = preg_replace('/&amp;/u', "&", $require); 402 $require = preg_replace('/&amp;/u', "&", $require);
361 $require = preg_replace("/○/u", " ", $require); 403 $require = preg_replace("/○/u", " ", $require);
362 $require = preg_replace("/<br>/u", "\n", $require); 404 $require = preg_replace("/<br>/u", "\n", $require);
363 $require = preg_replace("/<br>/u", "\n", $require); 405 //$require = preg_replace("/<br>/u", "\n", $require);
406
407 // TODO: append metadata at the beginning of file
408 if ($_postdata['branchId'] == 0) {
409 // -- new branch case
410 $require = "<text_content>\n".$require."</text_content>\n";
411 }
412 $require = $this->AppendMetaData($require);
413
364 //saving in my local machine in developing phrase 414 //saving in my local machine in developing phrase
365 file_put_contents("data/parsing_files/".$_postdata['sectionId'].".txt", $require); 415 file_put_contents("data/parsing_files/".$_postdata['sectionId'].".txt", $require);
366 } 416 }
367 417
368 // ------ 418 // ------
407 curl_setopt($ch, CURLOPT_HTTPHEADER, array( 457 curl_setopt($ch, CURLOPT_HTTPHEADER, array(
408 'Content-type: multipart/form-data;charset=utf-8' 458 'Content-type: multipart/form-data;charset=utf-8'
409 )); 459 ));
410 460
411 // execute the request 461 // execute the request
412 $output = curl_exec($ch); 462 // **** commended to DEBUG ***
413 463 //$output = curl_exec($ch);
464 // *****
414 465
415 // output the profile information - includes the header 466 // output the profile information - includes the header
416 //echo($output) . PHP_EOL; 467 //echo($output) . PHP_EOL;
417 468
418 // close curl resource to free up system resources 469 // close curl resource to free up system resources
1145 1196
1146 private function ParseMetaData($filename) { 1197 private function ParseMetaData($filename) {
1147 $text = file_get_contents($filename); 1198 $text = file_get_contents($filename);
1148 $xml = simplexml_load_string($text) or die("Error: Cannot load from xml string"); 1199 $xml = simplexml_load_string($text) or die("Error: Cannot load from xml string");
1149 1200
1150 $this->topic = $xml->topic; // set topic id 1201 $this->topic = (string)$xml->topic; // set topic id
1151 1202
1152 // get taglist in file 1203 // get taglist in file
1153 $taglist_infile = $xml->taglist; 1204 $taglist_infile = $xml->tagitem;
1154 $taglistArray = array(); 1205 $taglistArray = array();
1155 foreach ($taglist_infile as $row) { 1206 foreach ($taglist_infile as $row) {
1156 array_push($taglistArray, array((string)$row->id, (string)$row->name, (string)$row->tag, (string)$row->color)); 1207 array_push($taglistArray, array((string)$row->id,(string)$row->name,(string)$row->tag,(string)$row->color ));
1157 } 1208 }
1158 $this->taglist_infile = $taglistArray; 1209 if ($taglistArray) {
1159 1210 $this->taglist_infile = $taglistArray;
1160 // TODO: get meta data of book 1211 }
1161 $this->book_meta = $xml->book; 1212
1213 // get book meta data
1214 $book_meta = $xml->book;
1215 $book_metaArray = array();
1216 foreach ($book_meta as $row) {
1217 //array_push($book_metaArray, array('title'=>(string)$row->title,'author'=>(string)$row->author,'year'=>(string)$row->year,'pagenumber'=>(string)$row->pagenumber ));
1218 array_push($book_metaArray, array((string)$row->title,(string)$row->author,(string)$row->year,(string)$row->pagenumber ));
1219 }
1220 if ($book_metaArray) {
1221 $this->book_meta = $book_metaArray;
1222 }
1162 1223
1163 // echo $taglist->name.", ".$taglist->tag."," .$taglist->color; 1224 // echo $taglist->name.", ".$taglist->tag."," .$taglist->color;
1164 // --- detect if the taglist set is up-to-date or not --- 1225 // --- detect if the taglist set is up-to-date or not ---
1165 1226 $contentString = (string)$xml->text_content->asXML();
1166 1227 //$removed_str = array("<text_content>","</text_content>");
1167 $contentString = $xml->text_content->asXML(); 1228 //$new_contentString = str_replace($removed_str, "", $contentString);
1168 1229
1169 return $contentString; 1230 return $contentString;
1170 } 1231 }
1171 private function GetSectionContent() { 1232 private function GetSectionContent() {
1172 $section_id = $this->GetSectionId(); 1233 $section_id = $this->GetSectionId();
1178 1239
1179 $contentString=""; 1240 $contentString="";
1180 $data_path = $this->GetDataPath(); 1241 $data_path = $this->GetDataPath();
1181 if ( file_exists($data_path."parsing_files/".$section_id.".txt") ) { 1242 if ( file_exists($data_path."parsing_files/".$section_id.".txt") ) {
1182 $filename = $data_path."parsing_files/".$section_id.".txt"; 1243 $filename = $data_path."parsing_files/".$section_id.".txt";
1183 // --- parsing meta data 1244
1245 // --- parsing meta data
1184 $stringInput = $this->ParseMetaData($filename); 1246 $stringInput = $this->ParseMetaData($filename);
1185 // ---- 1247 // ----
1186 1248
1187 // if the text is from file system 1249 // if the text is from file system
1188 $stringInput = preg_replace("/ /u", "○", $stringInput); 1250 $stringInput = preg_replace("/ /u", "○", $stringInput);