Mercurial > hg > extraction-interface
comparison develop/models/extractapp.php @ 6:63e08b98032f
rewrite extraction interface into PHP MVC architecture.
(Although js hasn't been rewritten into MVC, it's fitted into the current PHP MVC architecture.)
- The root of the new PHP MVC is at 'develop/'.
- extraction interface is called "Extractapp" with several action, eg TaggingText, EditWordlist, EditTaglist, ExportTable.
author | Zoe Hong <zhong@mpiwg-berlin.mpg.de> |
---|---|
date | Thu, 05 Feb 2015 16:07:53 +0100 |
parents | |
children | 584b1623e9ef |
comparison
equal
deleted
inserted
replaced
5:cbbb7ef22394 | 6:63e08b98032f |
---|---|
1 | |
2 | |
3 <?php | |
4 | |
5 class ExtractappModel extends BaseModel{ | |
6 | |
7 public function Index() { | |
8 return array("Index Value 1", "Value 2", "Value 3"); | |
9 } | |
10 | |
11 protected $section_id, $data_path; | |
12 | |
13 | |
14 private function Initialize($_urlvalues) { | |
15 $this->SetSectionId($_urlvalues); | |
16 } | |
17 | |
18 // === for tagging === | |
19 public function StartTagging($urlvalues) { | |
20 | |
21 $this->Initialize($urlvalues); | |
22 | |
23 $section_id = $this->GetSectionId(); | |
24 | |
25 $stringInput = $this->GetSectionContent(); | |
26 | |
27 $taglistArray = $this->GetTaglistArray(); | |
28 | |
29 $wordlistArray = $this->GetWordlistArray(); | |
30 | |
31 $data = array(); | |
32 $data['stringInput'] = $stringInput; | |
33 $data['taglistArray'] = $taglistArray; | |
34 $data['wordlistArray'] = $wordlistArray; | |
35 $data['section_id'] = $section_id; | |
36 | |
37 | |
38 return $data; | |
39 | |
40 } | |
41 | |
42 public function SaveFullText($postdata) { | |
43 if ($postdata['text']){ | |
44 $date = date('Y_m_d_H_i_s', time()); | |
45 if ( file_exists("data/parsing_files/".$postdata['filename'].".txt") ) { | |
46 $oldFile = file_get_contents("data/parsing_files/".$postdata['filename'].".txt"); | |
47 file_put_contents("data/parsing_files/".$postdata['filename']."_".$date.".txt", $oldFile); | |
48 } | |
49 | |
50 if (get_magic_quotes_gpc()) { | |
51 $require = stripslashes($postdata['text']); | |
52 } else { | |
53 $require = $postdata['text']; | |
54 } | |
55 | |
56 $require = preg_replace("/【<a(.*?)>(.*?)<\/a>】/u", "【\\2】", $require); | |
57 $require = preg_replace('/&/u', "&", $require); | |
58 $require = preg_replace("/○/u", " ", $require); | |
59 $require = preg_replace("/<br>/u", "\n", $require); | |
60 $require = preg_replace("/<br>/u", "\n", $require); | |
61 file_put_contents("data/parsing_files/".$postdata['filename'].".txt", $require); | |
62 } | |
63 | |
64 } | |
65 | |
66 | |
67 | |
68 // === for export table === | |
69 public function ExportTable($urlvalues, $postdata) { | |
70 $this->Initialize($urlvalues); | |
71 $content = $postdata['content']; | |
72 | |
73 | |
74 // outputTableArray: | |
75 $section_id = $this->GetSectionId(); | |
76 | |
77 $section_info = $this->GetSectionInfo(); | |
78 | |
79 $sectionName = $section_info['sectionName']; | |
80 $bookId = $section_info['bookId']; | |
81 | |
82 $books_info = $this->GetBooksInfo($bookId); | |
83 $bookName = $books_info['bookName']; | |
84 | |
85 $outputTableArray = array(); | |
86 | |
87 $taglistArray = $this->GetTaglistArray(); | |
88 | |
89 $outputTableArray[0]=array(); | |
90 $outputTableArray[0][0]=array(); | |
91 $outputTableArray[0][1]=array(); | |
92 foreach ( $taglistArray as $value ) { | |
93 $outputTableArray[0][0][$value[2]] = $value[1]; | |
94 $outputTableArray[0][1][$value[2]] = $value[1]."(Title)"; | |
95 } | |
96 $outputTableArray[0]["other"] = "其他"; | |
97 $outputTableArray[0]["page"] = "頁數"; | |
98 $outputTableArray[0]["full"] = "全文"; | |
99 | |
100 foreach ( $taglistArray as $tagValue ) { | |
101 $content = preg_replace("/<\/".$tagValue[2].">○*<".$tagValue[2].">/u", "", $content); | |
102 $content = preg_replace("/<".$tagValue[2].">[ ]*<\/".$tagValue[2].">/u", "", $content); | |
103 } | |
104 | |
105 $contentLineArray = explode( "<br>", $content ); | |
106 | |
107 $count=0; | |
108 $pageNow=NULL; | |
109 foreach ( $contentLineArray as $value ) { | |
110 $count++; | |
111 $recordString = $value; | |
112 $otherString = $recordString; | |
113 //echo $recordString."<br>\n"; | |
114 if ( preg_match("/【<a(.*?)>(.*?)<\/a>】/u", $recordString, $matches) ) { | |
115 $pageNow = $matches[2]; | |
116 } | |
117 foreach ( $taglistArray as $tagValue ) { | |
118 if ( preg_match_all("/<".$tagValue[2].">(.*?)<\/".$tagValue[2].">/u", $recordString, $matches, PREG_SET_ORDER) ) { | |
119 foreach ( $matches as $matchesValue ) { | |
120 $matchesValue[1] = preg_replace("/○/u", "", $matchesValue[1]); | |
121 if ( preg_match_all("/〈(.*?)〉/u", $matchesValue[1], $matches2, PREG_SET_ORDER) ) { | |
122 foreach ( $matches2 as $matches2Value ) { | |
123 if ( isset($outputTableArray[$count][0][$tagValue[2]]) ) { | |
124 $outputTableArray[$count][0][$tagValue[2]] .= ";".$matches2Value[1]; | |
125 } else { | |
126 $outputTableArray[$count][0][$tagValue[2]] = $matches2Value[1]; | |
127 } | |
128 } | |
129 } else { | |
130 if ( isset($outputTableArray[$count][0][$tagValue[2]]) ) { | |
131 $outputTableArray[$count][0][$tagValue[2]] .= ";".$matchesValue[1]; | |
132 } else { | |
133 $outputTableArray[$count][0][$tagValue[2]] = $matchesValue[1]; | |
134 } | |
135 } | |
136 } | |
137 $otherString = preg_replace("/<".$tagValue[2].">(.*?)<\/".$tagValue[2].">/u", " ", $otherString); | |
138 } | |
139 } | |
140 $otherString = preg_replace("/○/u", "", $otherString); | |
141 $outputTableArray[$count]["other"] = $otherString; | |
142 $outputTableArray[$count]["page"] = $pageNow; | |
143 $value = preg_replace("/>/u", ">", $value); | |
144 $value = preg_replace("/</u", "<", $value); | |
145 $outputTableArray[$count]["full"] = $value; | |
146 } | |
147 | |
148 foreach ( $outputTableArray as $arrayIndex => $arrayValue ) { | |
149 if ( !isset($arrayValue[0]["person"]) ) { | |
150 unset($outputTableArray[$arrayIndex]); | |
151 } | |
152 } | |
153 | |
154 | |
155 $data = array(); | |
156 | |
157 $data['outputTableArray'] = $outputTableArray; | |
158 $data['bookId'] = $bookId; | |
159 $data['section_id'] = $section_id; | |
160 $data['bookName'] = $bookName; | |
161 $data['sectionName'] = $sectionName; | |
162 | |
163 return $data; | |
164 | |
165 } | |
166 | |
167 // === for manage tag list === | |
168 public function EditTaglist($urlvalues) { | |
169 $this->Initialize($urlvalues); | |
170 $result = $this->GetTaglist(); | |
171 $taglistArray = array(); | |
172 while ($row = mysql_fetch_assoc($result)) { | |
173 $taglistArray[$row['id']] = array($row['name'], $row['tag'], $row['color']); | |
174 } | |
175 | |
176 $data = array(); | |
177 $data['taglistArray'] = $taglistArray; | |
178 return $data; | |
179 | |
180 } | |
181 | |
182 private function _GetTag($_postdata) { | |
183 if (get_magic_quotes_gpc()) { | |
184 $id = stripslashes($_postdata['id']); | |
185 $name = stripslashes($_postdata['name']); | |
186 $tag = stripslashes($_postdata['tag']); | |
187 $color = stripslashes($_postdata['color']); | |
188 } else { | |
189 $id = $_postdata['id']; | |
190 $name = $_postdata['name']; | |
191 $tag = $_postdata['tag']; | |
192 $color = $_postdata['color']; | |
193 } | |
194 return array($id, $name, $tag, $color); | |
195 } | |
196 | |
197 public function NewTagElement($postdata) { | |
198 if ($postdata['id']){ | |
199 list($id, $name, $tag, $color) = $this->_GetTag($postdata); | |
200 | |
201 $query1 = sprintf("INSERT INTO `taglist` (`id`, `name`, `tag`, `color`, `systemName`) VALUES (%s, %s, %s, %s, %s)", | |
202 $this->GetSQLValueString($id, "int"), | |
203 $this->GetSQLValueString($name, "text"), | |
204 $this->GetSQLValueString($tag, "text"), | |
205 $this->GetSQLValueString($color, "text"), | |
206 $this->GetSQLValueString($this->systemNAME, "text")); | |
207 $result1 = mysql_query($query1); | |
208 } | |
209 | |
210 } | |
211 | |
212 public function SaveTagElement($postdata) { | |
213 if ($postdata['id']){ | |
214 list($id, $name, $tag, $color) = $this->_GetTag($postdata); | |
215 | |
216 $queryInsert = sprintf("UPDATE taglist SET `name`=%s, `tag`=%s, `color`=%s WHERE `id`=%s", | |
217 $this->GetSQLValueString($name, "text"), | |
218 $this->GetSQLValueString($tag, "text"), | |
219 $this->GetSQLValueString($color, "text"), | |
220 $this->GetSQLValueString($id, "int")); | |
221 $resultInsert = mysql_query($queryInsert); | |
222 | |
223 } | |
224 | |
225 } | |
226 | |
227 public function DeleteTag($postdata) { | |
228 if ($postdata['id']) { | |
229 $queryInsert = sprintf("DELETE FROM `taglist` WHERE `id` = %s", stripslashes($postdata['id'])); | |
230 $resultInsert = mysql_query($queryInsert); | |
231 } | |
232 | |
233 } | |
234 // =========================== | |
235 | |
236 // === for manage wordlist === | |
237 public function EditWordlist($urlvalues) { | |
238 $this->Initialize($urlvalues); | |
239 $result = $this->GetWordlist(); | |
240 $wordlistArray = array(); | |
241 while ($row = mysql_fetch_assoc($result)) { | |
242 $wordlistArray[$row['id']] = $row['name']; | |
243 } | |
244 | |
245 $data = array(); | |
246 $data['wordlistArray'] = $wordlistArray; | |
247 return $data; | |
248 } | |
249 | |
250 | |
251 public function AddNewList($postdata) { | |
252 if ($postdata['text']){ | |
253 if (get_magic_quotes_gpc()) { | |
254 $name = stripslashes($postdata['text']); | |
255 } else { | |
256 $name = $postdata['text']; | |
257 } | |
258 | |
259 $query1 = sprintf("INSERT INTO `wordlist` (`name`, `systemName`) VALUES (%s, %s)", | |
260 $this->GetSQLValueString($name, "text"), | |
261 $this->GetSQLValueString($this->systemNAME, "text")); | |
262 $result1 = mysql_query($query1); | |
263 file_put_contents( "data/wordlist/".mysql_insert_id().".txt", "(empty now)"); | |
264 } | |
265 } | |
266 | |
267 | |
268 public function SaveWordlist($postdata) { | |
269 | |
270 if ($postdata['text']){ | |
271 $date = date('Y_m_d_H_i_s', time()); | |
272 if ( file_exists("data/wordlist/".$postdata['filename'].".txt") ) { | |
273 $filename = "data/wordlist/".$postdata['filename']."_".$date.".txt"; | |
274 $oldFile = file_get_contents("data/wordlist/".$postdata['filename'].".txt"); | |
275 file_put_contents($filename, $oldFile); | |
276 } else { | |
277 $filename = "data/wordlist/".$postdata['filename'].".txt"; | |
278 } | |
279 | |
280 | |
281 if (get_magic_quotes_gpc()) { | |
282 $require = stripslashes($postdata['text']); | |
283 } else { | |
284 $require = $postdata['text']; | |
285 } | |
286 | |
287 $require = preg_replace("/<br>/u", "<br>", $require); | |
288 file_put_contents($filename, $require); | |
289 } | |
290 /* | |
291 if ($postdata['text']){ | |
292 $date = date('Y_m_d_H_i_s', time()); | |
293 if ( file_exists("data/wordlist/".$postdata['filename'].".txt") ) { | |
294 $oldFile = file_get_contents("data/wordlist/".$postdata['filename'].".txt"); | |
295 file_put_contents("data/wordlist/".$postdata['filename']."_".$date.".txt", $oldFile); | |
296 } | |
297 | |
298 if (get_magic_quotes_gpc()) { | |
299 $require = stripslashes($postdata['text']); | |
300 } else { | |
301 $require = $postdata['text']; | |
302 } | |
303 | |
304 // $require = preg_replace("/<br>/u", "", $require); | |
305 file_put_contents("data/wordlist/".$postdata['filename'].".txt", $require); | |
306 } | |
307 */ | |
308 } | |
309 | |
310 | |
311 // ======================================= | |
312 | |
313 public function sortFunction($a,$b) { | |
314 return strlen($b)-strlen($a); | |
315 } | |
316 | |
317 | |
318 private function SetSectionId($_urlvalues) { | |
319 // TODO: maybe get user info also | |
320 // get book id from url | |
321 if ($_urlvalues['id'] != "") { | |
322 $section_id = $_urlvalues['id']; | |
323 } else { | |
324 return json_encode("Error: No section id"); | |
325 /* ???? */ | |
326 /* | |
327 $get_book_id = $urlvalues['book']; | |
328 $get_start = $urlvalues['start']; | |
329 $get_end = $urlvalues['end']; | |
330 */ | |
331 } | |
332 $this->section_id = $section_id; | |
333 | |
334 } | |
335 | |
336 private function GetSectionId() { | |
337 if ($this->section_id) { | |
338 return $this->section_id; | |
339 } else { | |
340 return json_encode("Error: No section id"); | |
341 } | |
342 } | |
343 | |
344 private function GetSectionInfo() { | |
345 $section_id = $this->GetSectionId(); | |
346 if (!is_numeric($section_id)){ | |
347 return $section_id; | |
348 } | |
349 | |
350 $result = $this->GetSectionsByID($section_id); | |
351 | |
352 | |
353 while ($row = mysql_fetch_assoc($result)) { | |
354 $bookId=$row['books_id']; | |
355 $startPage=$row['start_page']; | |
356 $endPage=$row['end_page']; | |
357 $sectionName = $row['name']; | |
358 } | |
359 | |
360 $data = array(); | |
361 $data['bookId'] = $bookId; | |
362 $data['startPage'] = $startPage; | |
363 $data['endPage'] = $endPage; | |
364 $data['sectionName'] = $sectionName; | |
365 | |
366 return $data; | |
367 } | |
368 | |
369 private function GetSectionContent() { | |
370 $section_id = $this->GetSectionId(); | |
371 $section_info = $this->GetSectionInfo(); | |
372 | |
373 $bookId = $section_info['bookId']; | |
374 $startPage = $section_info['startPage']; | |
375 $endPage = $section_id['endPage']; | |
376 | |
377 | |
378 $contentString=""; | |
379 $data_path = $this->GetDataPath(); | |
380 if ( file_exists($data_path."parsing_files/".$section_id.".txt") ) { | |
381 $contentString=file_get_contents($data_path."parsing_files/".$section_id.".txt"); | |
382 $stringInput = $contentString; | |
383 $stringInput = preg_replace("/ /u", "○", $stringInput); | |
384 $stringInput = preg_replace("/\n/u", "<br>", $stringInput); | |
385 $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"review_index_xml_images.php?books_id=".$bookId."&pages=\\1&entry=0\" target=\"_bookImg\">\\1</a>】", $stringInput); | |
386 } else { | |
387 $query = sprintf("SELECT `content`, `line`, `books_id` FROM `contents` WHERE `books_id`=\"%s\" AND `line`>=%d AND `line`<=%d", $bookId, $startPage, $endPage); | |
388 $result = mysql_query($query); | |
389 while ($row = mysql_fetch_assoc($result)) { | |
390 $contentString.="【".$row['line']."】".$row['content']."\n"; | |
391 } | |
392 $stringInput = $contentString; | |
393 $stringInput = preg_replace("/<(.*?)>/u", "○", $stringInput); | |
394 $stringInput = preg_replace("/ /u", "○", $stringInput); | |
395 $stringInput = preg_replace("/\n/u", "<br>", $stringInput); | |
396 $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"review_index_xml_images.php?books_id=".$bookId."&pages=\\1&entry=0\" target=\"_bookImg\">\\1</a>】", $stringInput); | |
397 } | |
398 | |
399 return $stringInput; | |
400 } | |
401 | |
402 private function GetDataPath() { | |
403 return getcwd()."/data/"; // get the current file path, which is getcwd(), and concatenate with "/data/" | |
404 } | |
405 | |
406 private function GetTaglistArray() { | |
407 $taglistArray=""; | |
408 $result = $this->GetTaglist(); | |
409 while ($row = mysql_fetch_assoc($result)) { | |
410 $taglistArray[] = array( $row['id'], $row['name'], $row['tag'], $row['color'] ); | |
411 | |
412 } | |
413 | |
414 return $taglistArray; | |
415 | |
416 } | |
417 | |
418 private function GetWordlistArray() { | |
419 $wordlistArray=""; | |
420 $result = $this->GetWordlist(); | |
421 while ($row = mysql_fetch_assoc($result)) { | |
422 $listString = file_get_contents("wordlist/".$row['id'].".txt"); | |
423 $listString = preg_replace("/<div>/u", "\n", $listString); | |
424 $listString = preg_replace("/<\/div>/u", "", $listString); | |
425 $listString = preg_replace("/<span(.*?)>/u", "", $listString); | |
426 $listString = preg_replace("/<\/span>/u", "", $listString); | |
427 //$listString = preg_replace("/\n/u", "|", $listString); | |
428 | |
429 $wordlistArray2 = explode( "\n", $listString ); | |
430 usort($wordlistArray2,'sortFunction'); | |
431 foreach ( $wordlistArray2 as $index=>$value ) { | |
432 $wordlistArray2[$index] = implode("○?", preg_split("/(?<!^)(?!$)/u", $value)); | |
433 } | |
434 foreach ( $wordlistArray2 as $index=>$value ) { | |
435 if ($value=="") unset($wordlistArray2[$index]); | |
436 | |
437 } | |
438 $listString = implode("|", $wordlistArray2); | |
439 | |
440 if ( $listString[0]=="|" ) $listString = substr($listString, 1); | |
441 $wordlistArray[] = array( $row['id'], $row['name'], $listString ); | |
442 } | |
443 | |
444 | |
445 return $wordlistArray; | |
446 | |
447 } | |
448 | |
449 protected function GetBooksInfo($bookId) { | |
450 $result = $this->GetSectionsByID($bookId); | |
451 while ($row = mysql_fetch_assoc($result)) { | |
452 $bookName = $row['name']; | |
453 } | |
454 | |
455 $data = array(); | |
456 $data['bookName'] = $bookName; | |
457 | |
458 return $data; | |
459 } | |
460 | |
461 | |
462 } | |
463 | |
464 | |
465 | |
466 ?> |