comparison develop/models/extractapp.php @ 6:63e08b98032f

rewrite extraction interface into PHP MVC architecture. (Although js hasn't been rewritten into MVC, it's fitted into the current PHP MVC architecture.) - The root of the new PHP MVC is at 'develop/'. - extraction interface is called "Extractapp" with several action, eg TaggingText, EditWordlist, EditTaglist, ExportTable.
author Zoe Hong <zhong@mpiwg-berlin.mpg.de>
date Thu, 05 Feb 2015 16:07:53 +0100
parents
children 584b1623e9ef
comparison
equal deleted inserted replaced
5:cbbb7ef22394 6:63e08b98032f
1
2
3 <?php
4
5 class ExtractappModel extends BaseModel{
6
7 public function Index() {
8 return array("Index Value 1", "Value 2", "Value 3");
9 }
10
11 protected $section_id, $data_path;
12
13
14 private function Initialize($_urlvalues) {
15 $this->SetSectionId($_urlvalues);
16 }
17
18 // === for tagging ===
19 public function StartTagging($urlvalues) {
20
21 $this->Initialize($urlvalues);
22
23 $section_id = $this->GetSectionId();
24
25 $stringInput = $this->GetSectionContent();
26
27 $taglistArray = $this->GetTaglistArray();
28
29 $wordlistArray = $this->GetWordlistArray();
30
31 $data = array();
32 $data['stringInput'] = $stringInput;
33 $data['taglistArray'] = $taglistArray;
34 $data['wordlistArray'] = $wordlistArray;
35 $data['section_id'] = $section_id;
36
37
38 return $data;
39
40 }
41
42 public function SaveFullText($postdata) {
43 if ($postdata['text']){
44 $date = date('Y_m_d_H_i_s', time());
45 if ( file_exists("data/parsing_files/".$postdata['filename'].".txt") ) {
46 $oldFile = file_get_contents("data/parsing_files/".$postdata['filename'].".txt");
47 file_put_contents("data/parsing_files/".$postdata['filename']."_".$date.".txt", $oldFile);
48 }
49
50 if (get_magic_quotes_gpc()) {
51 $require = stripslashes($postdata['text']);
52 } else {
53 $require = $postdata['text'];
54 }
55
56 $require = preg_replace("/【<a(.*?)>(.*?)<\/a>】/u", "【\\2】", $require);
57 $require = preg_replace('/&amp;/u', "&", $require);
58 $require = preg_replace("/○/u", " ", $require);
59 $require = preg_replace("/<br>/u", "\n", $require);
60 $require = preg_replace("/<br>/u", "\n", $require);
61 file_put_contents("data/parsing_files/".$postdata['filename'].".txt", $require);
62 }
63
64 }
65
66
67
68 // === for export table ===
69 public function ExportTable($urlvalues, $postdata) {
70 $this->Initialize($urlvalues);
71 $content = $postdata['content'];
72
73
74 // outputTableArray:
75 $section_id = $this->GetSectionId();
76
77 $section_info = $this->GetSectionInfo();
78
79 $sectionName = $section_info['sectionName'];
80 $bookId = $section_info['bookId'];
81
82 $books_info = $this->GetBooksInfo($bookId);
83 $bookName = $books_info['bookName'];
84
85 $outputTableArray = array();
86
87 $taglistArray = $this->GetTaglistArray();
88
89 $outputTableArray[0]=array();
90 $outputTableArray[0][0]=array();
91 $outputTableArray[0][1]=array();
92 foreach ( $taglistArray as $value ) {
93 $outputTableArray[0][0][$value[2]] = $value[1];
94 $outputTableArray[0][1][$value[2]] = $value[1]."(Title)";
95 }
96 $outputTableArray[0]["other"] = "其他";
97 $outputTableArray[0]["page"] = "頁數";
98 $outputTableArray[0]["full"] = "全文";
99
100 foreach ( $taglistArray as $tagValue ) {
101 $content = preg_replace("/<\/".$tagValue[2].">○*<".$tagValue[2].">/u", "", $content);
102 $content = preg_replace("/<".$tagValue[2].">[ ]*<\/".$tagValue[2].">/u", "", $content);
103 }
104
105 $contentLineArray = explode( "<br>", $content );
106
107 $count=0;
108 $pageNow=NULL;
109 foreach ( $contentLineArray as $value ) {
110 $count++;
111 $recordString = $value;
112 $otherString = $recordString;
113 //echo $recordString."<br>\n";
114 if ( preg_match("/【<a(.*?)>(.*?)<\/a>】/u", $recordString, $matches) ) {
115 $pageNow = $matches[2];
116 }
117 foreach ( $taglistArray as $tagValue ) {
118 if ( preg_match_all("/<".$tagValue[2].">(.*?)<\/".$tagValue[2].">/u", $recordString, $matches, PREG_SET_ORDER) ) {
119 foreach ( $matches as $matchesValue ) {
120 $matchesValue[1] = preg_replace("/○/u", "", $matchesValue[1]);
121 if ( preg_match_all("/〈(.*?)〉/u", $matchesValue[1], $matches2, PREG_SET_ORDER) ) {
122 foreach ( $matches2 as $matches2Value ) {
123 if ( isset($outputTableArray[$count][0][$tagValue[2]]) ) {
124 $outputTableArray[$count][0][$tagValue[2]] .= ";".$matches2Value[1];
125 } else {
126 $outputTableArray[$count][0][$tagValue[2]] = $matches2Value[1];
127 }
128 }
129 } else {
130 if ( isset($outputTableArray[$count][0][$tagValue[2]]) ) {
131 $outputTableArray[$count][0][$tagValue[2]] .= ";".$matchesValue[1];
132 } else {
133 $outputTableArray[$count][0][$tagValue[2]] = $matchesValue[1];
134 }
135 }
136 }
137 $otherString = preg_replace("/<".$tagValue[2].">(.*?)<\/".$tagValue[2].">/u", " ", $otherString);
138 }
139 }
140 $otherString = preg_replace("/○/u", "", $otherString);
141 $outputTableArray[$count]["other"] = $otherString;
142 $outputTableArray[$count]["page"] = $pageNow;
143 $value = preg_replace("/>/u", "&gt;", $value);
144 $value = preg_replace("/</u", "&lt;", $value);
145 $outputTableArray[$count]["full"] = $value;
146 }
147
148 foreach ( $outputTableArray as $arrayIndex => $arrayValue ) {
149 if ( !isset($arrayValue[0]["person"]) ) {
150 unset($outputTableArray[$arrayIndex]);
151 }
152 }
153
154
155 $data = array();
156
157 $data['outputTableArray'] = $outputTableArray;
158 $data['bookId'] = $bookId;
159 $data['section_id'] = $section_id;
160 $data['bookName'] = $bookName;
161 $data['sectionName'] = $sectionName;
162
163 return $data;
164
165 }
166
167 // === for manage tag list ===
168 public function EditTaglist($urlvalues) {
169 $this->Initialize($urlvalues);
170 $result = $this->GetTaglist();
171 $taglistArray = array();
172 while ($row = mysql_fetch_assoc($result)) {
173 $taglistArray[$row['id']] = array($row['name'], $row['tag'], $row['color']);
174 }
175
176 $data = array();
177 $data['taglistArray'] = $taglistArray;
178 return $data;
179
180 }
181
182 private function _GetTag($_postdata) {
183 if (get_magic_quotes_gpc()) {
184 $id = stripslashes($_postdata['id']);
185 $name = stripslashes($_postdata['name']);
186 $tag = stripslashes($_postdata['tag']);
187 $color = stripslashes($_postdata['color']);
188 } else {
189 $id = $_postdata['id'];
190 $name = $_postdata['name'];
191 $tag = $_postdata['tag'];
192 $color = $_postdata['color'];
193 }
194 return array($id, $name, $tag, $color);
195 }
196
197 public function NewTagElement($postdata) {
198 if ($postdata['id']){
199 list($id, $name, $tag, $color) = $this->_GetTag($postdata);
200
201 $query1 = sprintf("INSERT INTO `taglist` (`id`, `name`, `tag`, `color`, `systemName`) VALUES (%s, %s, %s, %s, %s)",
202 $this->GetSQLValueString($id, "int"),
203 $this->GetSQLValueString($name, "text"),
204 $this->GetSQLValueString($tag, "text"),
205 $this->GetSQLValueString($color, "text"),
206 $this->GetSQLValueString($this->systemNAME, "text"));
207 $result1 = mysql_query($query1);
208 }
209
210 }
211
212 public function SaveTagElement($postdata) {
213 if ($postdata['id']){
214 list($id, $name, $tag, $color) = $this->_GetTag($postdata);
215
216 $queryInsert = sprintf("UPDATE taglist SET `name`=%s, `tag`=%s, `color`=%s WHERE `id`=%s",
217 $this->GetSQLValueString($name, "text"),
218 $this->GetSQLValueString($tag, "text"),
219 $this->GetSQLValueString($color, "text"),
220 $this->GetSQLValueString($id, "int"));
221 $resultInsert = mysql_query($queryInsert);
222
223 }
224
225 }
226
227 public function DeleteTag($postdata) {
228 if ($postdata['id']) {
229 $queryInsert = sprintf("DELETE FROM `taglist` WHERE `id` = %s", stripslashes($postdata['id']));
230 $resultInsert = mysql_query($queryInsert);
231 }
232
233 }
234 // ===========================
235
236 // === for manage wordlist ===
237 public function EditWordlist($urlvalues) {
238 $this->Initialize($urlvalues);
239 $result = $this->GetWordlist();
240 $wordlistArray = array();
241 while ($row = mysql_fetch_assoc($result)) {
242 $wordlistArray[$row['id']] = $row['name'];
243 }
244
245 $data = array();
246 $data['wordlistArray'] = $wordlistArray;
247 return $data;
248 }
249
250
251 public function AddNewList($postdata) {
252 if ($postdata['text']){
253 if (get_magic_quotes_gpc()) {
254 $name = stripslashes($postdata['text']);
255 } else {
256 $name = $postdata['text'];
257 }
258
259 $query1 = sprintf("INSERT INTO `wordlist` (`name`, `systemName`) VALUES (%s, %s)",
260 $this->GetSQLValueString($name, "text"),
261 $this->GetSQLValueString($this->systemNAME, "text"));
262 $result1 = mysql_query($query1);
263 file_put_contents( "data/wordlist/".mysql_insert_id().".txt", "(empty now)");
264 }
265 }
266
267
268 public function SaveWordlist($postdata) {
269
270 if ($postdata['text']){
271 $date = date('Y_m_d_H_i_s', time());
272 if ( file_exists("data/wordlist/".$postdata['filename'].".txt") ) {
273 $filename = "data/wordlist/".$postdata['filename']."_".$date.".txt";
274 $oldFile = file_get_contents("data/wordlist/".$postdata['filename'].".txt");
275 file_put_contents($filename, $oldFile);
276 } else {
277 $filename = "data/wordlist/".$postdata['filename'].".txt";
278 }
279
280
281 if (get_magic_quotes_gpc()) {
282 $require = stripslashes($postdata['text']);
283 } else {
284 $require = $postdata['text'];
285 }
286
287 $require = preg_replace("/<br>/u", "<br>", $require);
288 file_put_contents($filename, $require);
289 }
290 /*
291 if ($postdata['text']){
292 $date = date('Y_m_d_H_i_s', time());
293 if ( file_exists("data/wordlist/".$postdata['filename'].".txt") ) {
294 $oldFile = file_get_contents("data/wordlist/".$postdata['filename'].".txt");
295 file_put_contents("data/wordlist/".$postdata['filename']."_".$date.".txt", $oldFile);
296 }
297
298 if (get_magic_quotes_gpc()) {
299 $require = stripslashes($postdata['text']);
300 } else {
301 $require = $postdata['text'];
302 }
303
304 // $require = preg_replace("/<br>/u", "", $require);
305 file_put_contents("data/wordlist/".$postdata['filename'].".txt", $require);
306 }
307 */
308 }
309
310
311 // =======================================
312
313 public function sortFunction($a,$b) {
314 return strlen($b)-strlen($a);
315 }
316
317
318 private function SetSectionId($_urlvalues) {
319 // TODO: maybe get user info also
320 // get book id from url
321 if ($_urlvalues['id'] != "") {
322 $section_id = $_urlvalues['id'];
323 } else {
324 return json_encode("Error: No section id");
325 /* ???? */
326 /*
327 $get_book_id = $urlvalues['book'];
328 $get_start = $urlvalues['start'];
329 $get_end = $urlvalues['end'];
330 */
331 }
332 $this->section_id = $section_id;
333
334 }
335
336 private function GetSectionId() {
337 if ($this->section_id) {
338 return $this->section_id;
339 } else {
340 return json_encode("Error: No section id");
341 }
342 }
343
344 private function GetSectionInfo() {
345 $section_id = $this->GetSectionId();
346 if (!is_numeric($section_id)){
347 return $section_id;
348 }
349
350 $result = $this->GetSectionsByID($section_id);
351
352
353 while ($row = mysql_fetch_assoc($result)) {
354 $bookId=$row['books_id'];
355 $startPage=$row['start_page'];
356 $endPage=$row['end_page'];
357 $sectionName = $row['name'];
358 }
359
360 $data = array();
361 $data['bookId'] = $bookId;
362 $data['startPage'] = $startPage;
363 $data['endPage'] = $endPage;
364 $data['sectionName'] = $sectionName;
365
366 return $data;
367 }
368
369 private function GetSectionContent() {
370 $section_id = $this->GetSectionId();
371 $section_info = $this->GetSectionInfo();
372
373 $bookId = $section_info['bookId'];
374 $startPage = $section_info['startPage'];
375 $endPage = $section_id['endPage'];
376
377
378 $contentString="";
379 $data_path = $this->GetDataPath();
380 if ( file_exists($data_path."parsing_files/".$section_id.".txt") ) {
381 $contentString=file_get_contents($data_path."parsing_files/".$section_id.".txt");
382 $stringInput = $contentString;
383 $stringInput = preg_replace("/ /u", "○", $stringInput);
384 $stringInput = preg_replace("/\n/u", "<br>", $stringInput);
385 $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"review_index_xml_images.php?books_id=".$bookId."&pages=\\1&entry=0\" target=\"_bookImg\">\\1</a>】", $stringInput);
386 } else {
387 $query = sprintf("SELECT `content`, `line`, `books_id` FROM `contents` WHERE `books_id`=\"%s\" AND `line`>=%d AND `line`<=%d", $bookId, $startPage, $endPage);
388 $result = mysql_query($query);
389 while ($row = mysql_fetch_assoc($result)) {
390 $contentString.="【".$row['line']."】".$row['content']."\n";
391 }
392 $stringInput = $contentString;
393 $stringInput = preg_replace("/<(.*?)>/u", "○", $stringInput);
394 $stringInput = preg_replace("/ /u", "○", $stringInput);
395 $stringInput = preg_replace("/\n/u", "<br>", $stringInput);
396 $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"review_index_xml_images.php?books_id=".$bookId."&pages=\\1&entry=0\" target=\"_bookImg\">\\1</a>】", $stringInput);
397 }
398
399 return $stringInput;
400 }
401
402 private function GetDataPath() {
403 return getcwd()."/data/"; // get the current file path, which is getcwd(), and concatenate with "/data/"
404 }
405
406 private function GetTaglistArray() {
407 $taglistArray="";
408 $result = $this->GetTaglist();
409 while ($row = mysql_fetch_assoc($result)) {
410 $taglistArray[] = array( $row['id'], $row['name'], $row['tag'], $row['color'] );
411
412 }
413
414 return $taglistArray;
415
416 }
417
418 private function GetWordlistArray() {
419 $wordlistArray="";
420 $result = $this->GetWordlist();
421 while ($row = mysql_fetch_assoc($result)) {
422 $listString = file_get_contents("wordlist/".$row['id'].".txt");
423 $listString = preg_replace("/<div>/u", "\n", $listString);
424 $listString = preg_replace("/<\/div>/u", "", $listString);
425 $listString = preg_replace("/<span(.*?)>/u", "", $listString);
426 $listString = preg_replace("/<\/span>/u", "", $listString);
427 //$listString = preg_replace("/\n/u", "|", $listString);
428
429 $wordlistArray2 = explode( "\n", $listString );
430 usort($wordlistArray2,'sortFunction');
431 foreach ( $wordlistArray2 as $index=>$value ) {
432 $wordlistArray2[$index] = implode("○?", preg_split("/(?<!^)(?!$)/u", $value));
433 }
434 foreach ( $wordlistArray2 as $index=>$value ) {
435 if ($value=="") unset($wordlistArray2[$index]);
436
437 }
438 $listString = implode("|", $wordlistArray2);
439
440 if ( $listString[0]=="|" ) $listString = substr($listString, 1);
441 $wordlistArray[] = array( $row['id'], $row['name'], $listString );
442 }
443
444
445 return $wordlistArray;
446
447 }
448
449 protected function GetBooksInfo($bookId) {
450 $result = $this->GetSectionsByID($bookId);
451 while ($row = mysql_fetch_assoc($result)) {
452 $bookName = $row['name'];
453 }
454
455 $data = array();
456 $data['bookName'] = $bookName;
457
458 return $data;
459 }
460
461
462 }
463
464
465
466 ?>