Mercurial > hg > extraction-interface
comparison develop/models/extractapp.php @ 40:2e938dc046db extractapp
load,save xml file with topic, etc.
author | Zoe Hong <zhong@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 10 Mar 2015 13:46:16 +0100 |
parents | 8347776a44fc |
children | 533a6c39c128 |
comparison
equal
deleted
inserted
replaced
39:8347776a44fc | 40:2e938dc046db |
---|---|
16 $this->SetSectionId($_urlvalues); | 16 $this->SetSectionId($_urlvalues); |
17 | 17 |
18 } | 18 } |
19 | 19 |
20 public function GetTextFromFileId($_postdata) { | 20 public function GetTextFromFileId($_postdata) { |
21 /* | |
22 if ($this->current_fileId != 0) { | |
23 $this->file_id = $this->current_fileId; | |
24 } else { | |
25 $this->file_id = $_postdata['fileId']; | |
26 } | |
27 */ | |
28 $this->file_id = $_postdata['fileId']; | 21 $this->file_id = $_postdata['fileId']; |
29 $branch_id = $_postdata['branchId']; | 22 $branch_id = $_postdata['branchId']; |
30 $section_id = $_postdata['sectionId']; | 23 $section_id = $_postdata['sectionId']; |
31 | 24 |
32 $this->branch_id = $branch_id; | 25 $this->branch_id = $branch_id; |
33 $this->user_id = $_postdata['userId']; | 26 $this->user_id = $_postdata['userId']; |
34 $this->section_id = $section_id; | 27 $this->section_id = $section_id; |
35 | 28 |
36 | |
37 $this->section_name = $_postdata['sectionName']; | 29 $this->section_name = $_postdata['sectionName']; |
38 $this->book_id = $_postdata['bookId']; | 30 $this->book_id = $_postdata['bookId']; |
39 $this->book_name = $_postdata['bookName']; | 31 $this->book_name = $_postdata['bookName']; |
40 | 32 |
41 | |
42 // get from URL with file_id | 33 // get from URL with file_id |
43 $lg_text_url = $this->get_text_from_fileId_url.$this->file_id; | 34 $lg_text_url = $this->get_text_from_fileId_url.$this->file_id; |
44 $lg_text = file_get_contents($lg_text_url); | 35 //$lg_text = file_get_contents($lg_text_url); |
36 // --- parsing meta data | |
37 $lg_text = $this->ParseMetaData($lg_text_url); | |
38 | |
39 // ---- | |
40 | |
45 | 41 |
46 $stringInput = $lg_text; | 42 $stringInput = $lg_text; |
47 $stringInput = preg_replace("/ /u", "○", $stringInput); | 43 $stringInput = preg_replace("/ /u", "○", $stringInput); |
48 $stringInput = preg_replace("/\n/u", "<br>", $stringInput); | 44 $stringInput = preg_replace("/\n/u", "<br>", $stringInput); |
49 $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"review_index_xml_images.php?books_id=".$bookId."&pages=\\1&entry=0\" target=\"_bookImg\">\\1</a>】", $stringInput); | 45 $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"review_index_xml_images.php?books_id=".$bookId."&pages=\\1&entry=0\" target=\"_bookImg\">\\1</a>】", $stringInput); |
52 $this->lg_text = $lg_text; | 48 $this->lg_text = $lg_text; |
53 | 49 |
54 } | 50 } |
55 | 51 |
56 public function GetTextFromSectionId($_postdata) { | 52 public function GetTextFromSectionId($_postdata) { |
57 | |
58 $section_id = $_postdata['sectionId']; | 53 $section_id = $_postdata['sectionId']; |
59 $this->section_id = $section_id; | 54 $this->section_id = $section_id; |
60 $this->user_id = $_postdata['userId']; | 55 $this->user_id = $_postdata['userId']; |
61 | 56 |
62 $this->section_name = $_postdata['sectionName']; | 57 $this->section_name = $_postdata['sectionName']; |
63 $this->book_id = $_postdata['bookId']; | 58 $this->book_id = $_postdata['bookId']; |
64 $this->book_name = $_postdata['bookName']; | 59 $this->book_name = $_postdata['bookName']; |
65 | 60 |
66 // get from URL with file_id | 61 // get from URL with file_id |
67 $lg_text_url = $this->get_text_from_sectionId_url.$section_id; | 62 $lg_text_url = $this->get_text_from_sectionId_url.$section_id; |
68 | |
69 $lg_text = file_get_contents($lg_text_url); | 63 $lg_text = file_get_contents($lg_text_url); |
70 | 64 |
71 $stringInput = $lg_text; | 65 $stringInput = $lg_text; |
72 $stringInput = preg_replace("/<(.*?)>/u", "○", $stringInput); | 66 $stringInput = preg_replace("/<(.*?)>/u", "○", $stringInput); |
73 $stringInput = preg_replace("/ /u", "○", $stringInput); | 67 $stringInput = preg_replace("/ /u", "○", $stringInput); |
116 $this->book_id = $_postdata['bookId']; | 110 $this->book_id = $_postdata['bookId']; |
117 } | 111 } |
118 if ($_postdata['currentFileId']) { | 112 if ($_postdata['currentFileId']) { |
119 $this->current_fileId = $_postdata['currentFileId']; | 113 $this->current_fileId = $_postdata['currentFileId']; |
120 } | 114 } |
121 | 115 if ($_postdata['taglistArray']) { |
122 | 116 $this->taglist_infile = json_decode($_postdata['taglistArray']); |
117 } | |
118 if ($_postdata['book_meta']) { | |
119 $this->book_meta = json_decode($_postdata['book_meta']); | |
120 } | |
123 | 121 |
124 } | 122 } |
125 public function InitData($_postdata) { | 123 public function InitData($_postdata) { |
126 $file_id = $_postdata['fileId']; | 124 $file_id = $_postdata['fileId']; |
127 $branch_id = $_postdata['branchId']; | 125 $branch_id = $_postdata['branchId']; |
192 | 190 |
193 } | 191 } |
194 */ | 192 */ |
195 | 193 |
196 | 194 |
195 // TODO: comparison not correct | |
197 private function Taglist_infileUpToDate($taglistArray) { | 196 private function Taglist_infileUpToDate($taglistArray) { |
198 // compare $this->taglist_infile is the same as $taglistArray | 197 // compare $this->taglist_infile is the same as $taglistArray |
199 $taglist_infile = $this->taglist_infile; | 198 $taglist_infile = $this->taglist_infile; |
200 if (count($taglist_infile) != count($taglistArray)) { | 199 if (count($taglist_infile) != count($taglistArray)) { |
201 return false; | 200 return false; |
202 } | 201 } |
203 | 202 |
204 foreach ($taglistArray as $row_indb) { | 203 foreach ($taglistArray as $row_indb) { |
204 $cnt = 0; | |
205 foreach ($taglist_infile as $row) { | 205 foreach ($taglist_infile as $row) { |
206 //$taglistArray: array( $row['id'], $row['name'], $row['tag'], $row['color'] ) | 206 //$taglistArray: array( $row['id'], $row['name'], $row['tag'], $row['color'] ) |
207 if ($row[0] == $row_indb[0] && $row[1] == $row_indb[1] && $row[2] == $row_indb[2] && $row[3] == $row_indb[3]) { | 207 if ($row[0] == $row_indb[0] && $row[1] == $row_indb[1] && $row[2] == $row_indb[2] && $row[3] == $row_indb[3]) { |
208 | 208 |
209 } else { | 209 } else { |
210 return false; | 210 $cnt ++; |
211 } | 211 } |
212 } | |
213 if ($cnt == count($row)) { | |
214 return false; | |
212 } | 215 } |
213 } | 216 } |
214 return true; | 217 return true; |
215 } | 218 } |
216 // === for tagging === | 219 // === for tagging === |
223 | 226 |
224 //$taglistArray = $this->GetTaglistArray(); | 227 //$taglistArray = $this->GetTaglistArray(); |
225 //for GetTaglistByTopicID: | 228 //for GetTaglistByTopicID: |
226 $taglistArray = $this->GetTaglistByTopicID($this->GetTopic()); | 229 $taglistArray = $this->GetTaglistByTopicID($this->GetTopic()); |
227 | 230 |
231 $data['taglist_infile'] = $this->taglist_infile; | |
228 // TODO: check if taglist_infile is up-to-date | 232 // TODO: check if taglist_infile is up-to-date |
233 /* | |
229 if ( !$this->Taglist_infileUpToDate($taglistArray)) { | 234 if ( !$this->Taglist_infileUpToDate($taglistArray)) { |
230 $data['taglist_infile'] = $this->taglist_infile; | 235 $data['taglist_infile'] = $this->taglist_infile; |
231 } | 236 } else { |
232 | 237 $data['taglist_infile'] = ""; |
238 } | |
239 */ | |
240 | |
241 // book_meta | |
242 $data['book_meta'] = $this->book_meta; | |
233 | 243 |
234 // topic list | 244 // topic list |
235 $topiclistArray = $this->GetTopiclistArray(); | 245 $topiclistArray = $this->GetTopiclistArray(); |
236 | 246 |
237 | 247 |
336 | 346 |
337 } | 347 } |
338 | 348 |
339 | 349 |
340 } | 350 } |
351 private function AppendMetaData($text_content) { | |
352 $text = '<?xml version="1.0" encoding="UTF-8"?>'; | |
353 $text .= "\n<text>\n"; | |
354 // topic | |
355 $text .= "<topic>".$this->topic."</topic>\n"; | |
356 // book meta data | |
357 $book = $this->book_meta; | |
358 foreach ($book as $b) { | |
359 $text .= "<book>\n"; | |
360 $text .= "<title>".$b[0]."</title>\n"; | |
361 $text .= "<author>".$b[1]."</author>\n"; | |
362 $text .= "<year>".$b[2]."</year>\n"; | |
363 $text .= "<pagenumber>".$b[3]."</pagenumber>\n"; | |
364 $text .= "</book>\n"; | |
365 } | |
366 // taglist | |
367 $taglist = $this->taglist_infile; | |
368 foreach ($taglist as $tagitem) { | |
369 $text .= "<tagitem>\n"; | |
370 $text .= "<id>".$tagitem[0]."</id>\n"; | |
371 $text .= "<name>".$tagitem[1]."</name>\n"; | |
372 $text .= "<tag>".$tagitem[2]."</tag>\n"; | |
373 $text .= "<color>".$tagitem[3]."</color>\n"; | |
374 $text .= "</tagitem>\n"; | |
375 } | |
376 | |
377 // text_content | |
378 $text .= $text_content; | |
379 $text .= "\n</text>"; | |
380 | |
381 return $text; | |
382 } | |
341 public function SaveFullTextToLGService($_postdata) { | 383 public function SaveFullTextToLGService($_postdata) { |
342 // save tagged text (full text) by Jorge's API to lg service | 384 // save tagged text (full text) by Jorge's API to lg service |
343 | 385 |
344 // -------- | 386 // -------- |
345 if ($_postdata['text']){ | 387 if ($_postdata['text']){ |
353 if (get_magic_quotes_gpc()) { | 395 if (get_magic_quotes_gpc()) { |
354 $require = stripslashes($_postdata['text']); | 396 $require = stripslashes($_postdata['text']); |
355 } else { | 397 } else { |
356 $require = $_postdata['text']; | 398 $require = $_postdata['text']; |
357 } | 399 } |
358 | 400 |
359 $require = preg_replace("/【<a(.*?)>(.*?)<\/a>】/u", "【\\2】", $require); | 401 $require = preg_replace("/【<a(.*?)>(.*?)<\/a>】/u", "【\\2】", $require); |
360 $require = preg_replace('/&/u', "&", $require); | 402 $require = preg_replace('/&/u', "&", $require); |
361 $require = preg_replace("/○/u", " ", $require); | 403 $require = preg_replace("/○/u", " ", $require); |
362 $require = preg_replace("/<br>/u", "\n", $require); | 404 $require = preg_replace("/<br>/u", "\n", $require); |
363 $require = preg_replace("/<br>/u", "\n", $require); | 405 //$require = preg_replace("/<br>/u", "\n", $require); |
406 | |
407 // TODO: append metadata at the beginning of file | |
408 if ($_postdata['branchId'] == 0) { | |
409 // -- new branch case | |
410 $require = "<text_content>\n".$require."</text_content>\n"; | |
411 } | |
412 $require = $this->AppendMetaData($require); | |
413 | |
364 //saving in my local machine in developing phrase | 414 //saving in my local machine in developing phrase |
365 file_put_contents("data/parsing_files/".$_postdata['sectionId'].".txt", $require); | 415 file_put_contents("data/parsing_files/".$_postdata['sectionId'].".txt", $require); |
366 } | 416 } |
367 | 417 |
368 // ------ | 418 // ------ |
407 curl_setopt($ch, CURLOPT_HTTPHEADER, array( | 457 curl_setopt($ch, CURLOPT_HTTPHEADER, array( |
408 'Content-type: multipart/form-data;charset=utf-8' | 458 'Content-type: multipart/form-data;charset=utf-8' |
409 )); | 459 )); |
410 | 460 |
411 // execute the request | 461 // execute the request |
412 $output = curl_exec($ch); | 462 // **** commended to DEBUG *** |
413 | 463 //$output = curl_exec($ch); |
464 // ***** | |
414 | 465 |
415 // output the profile information - includes the header | 466 // output the profile information - includes the header |
416 //echo($output) . PHP_EOL; | 467 //echo($output) . PHP_EOL; |
417 | 468 |
418 // close curl resource to free up system resources | 469 // close curl resource to free up system resources |
1145 | 1196 |
1146 private function ParseMetaData($filename) { | 1197 private function ParseMetaData($filename) { |
1147 $text = file_get_contents($filename); | 1198 $text = file_get_contents($filename); |
1148 $xml = simplexml_load_string($text) or die("Error: Cannot load from xml string"); | 1199 $xml = simplexml_load_string($text) or die("Error: Cannot load from xml string"); |
1149 | 1200 |
1150 $this->topic = $xml->topic; // set topic id | 1201 $this->topic = (string)$xml->topic; // set topic id |
1151 | 1202 |
1152 // get taglist in file | 1203 // get taglist in file |
1153 $taglist_infile = $xml->taglist; | 1204 $taglist_infile = $xml->tagitem; |
1154 $taglistArray = array(); | 1205 $taglistArray = array(); |
1155 foreach ($taglist_infile as $row) { | 1206 foreach ($taglist_infile as $row) { |
1156 array_push($taglistArray, array((string)$row->id, (string)$row->name, (string)$row->tag, (string)$row->color)); | 1207 array_push($taglistArray, array((string)$row->id,(string)$row->name,(string)$row->tag,(string)$row->color )); |
1157 } | 1208 } |
1158 $this->taglist_infile = $taglistArray; | 1209 if ($taglistArray) { |
1159 | 1210 $this->taglist_infile = $taglistArray; |
1160 // TODO: get meta data of book | 1211 } |
1161 $this->book_meta = $xml->book; | 1212 |
1213 // get book meta data | |
1214 $book_meta = $xml->book; | |
1215 $book_metaArray = array(); | |
1216 foreach ($book_meta as $row) { | |
1217 //array_push($book_metaArray, array('title'=>(string)$row->title,'author'=>(string)$row->author,'year'=>(string)$row->year,'pagenumber'=>(string)$row->pagenumber )); | |
1218 array_push($book_metaArray, array((string)$row->title,(string)$row->author,(string)$row->year,(string)$row->pagenumber )); | |
1219 } | |
1220 if ($book_metaArray) { | |
1221 $this->book_meta = $book_metaArray; | |
1222 } | |
1162 | 1223 |
1163 // echo $taglist->name.", ".$taglist->tag."," .$taglist->color; | 1224 // echo $taglist->name.", ".$taglist->tag."," .$taglist->color; |
1164 // --- detect if the taglist set is up-to-date or not --- | 1225 // --- detect if the taglist set is up-to-date or not --- |
1165 | 1226 $contentString = (string)$xml->text_content->asXML(); |
1166 | 1227 //$removed_str = array("<text_content>","</text_content>"); |
1167 $contentString = $xml->text_content->asXML(); | 1228 //$new_contentString = str_replace($removed_str, "", $contentString); |
1168 | 1229 |
1169 return $contentString; | 1230 return $contentString; |
1170 } | 1231 } |
1171 private function GetSectionContent() { | 1232 private function GetSectionContent() { |
1172 $section_id = $this->GetSectionId(); | 1233 $section_id = $this->GetSectionId(); |
1178 | 1239 |
1179 $contentString=""; | 1240 $contentString=""; |
1180 $data_path = $this->GetDataPath(); | 1241 $data_path = $this->GetDataPath(); |
1181 if ( file_exists($data_path."parsing_files/".$section_id.".txt") ) { | 1242 if ( file_exists($data_path."parsing_files/".$section_id.".txt") ) { |
1182 $filename = $data_path."parsing_files/".$section_id.".txt"; | 1243 $filename = $data_path."parsing_files/".$section_id.".txt"; |
1183 // --- parsing meta data | 1244 |
1245 // --- parsing meta data | |
1184 $stringInput = $this->ParseMetaData($filename); | 1246 $stringInput = $this->ParseMetaData($filename); |
1185 // ---- | 1247 // ---- |
1186 | 1248 |
1187 // if the text is from file system | 1249 // if the text is from file system |
1188 $stringInput = preg_replace("/ /u", "○", $stringInput); | 1250 $stringInput = preg_replace("/ /u", "○", $stringInput); |