Mercurial > hg > extraction-interface
comparison develop/models/extractapp.php @ 6:63e08b98032f
rewrite extraction interface into PHP MVC architecture.
(Although js hasn't been rewritten into MVC, it's fitted into the current PHP MVC architecture.)
- The root of the new PHP MVC is at 'develop/'.
- extraction interface is called "Extractapp" with several action, eg TaggingText, EditWordlist, EditTaglist, ExportTable.
| author | Zoe Hong <zhong@mpiwg-berlin.mpg.de> |
|---|---|
| date | Thu, 05 Feb 2015 16:07:53 +0100 |
| parents | |
| children | 584b1623e9ef |
comparison
equal
deleted
inserted
replaced
| 5:cbbb7ef22394 | 6:63e08b98032f |
|---|---|
| 1 | |
| 2 | |
| 3 <?php | |
| 4 | |
| 5 class ExtractappModel extends BaseModel{ | |
| 6 | |
| 7 public function Index() { | |
| 8 return array("Index Value 1", "Value 2", "Value 3"); | |
| 9 } | |
| 10 | |
| 11 protected $section_id, $data_path; | |
| 12 | |
| 13 | |
| 14 private function Initialize($_urlvalues) { | |
| 15 $this->SetSectionId($_urlvalues); | |
| 16 } | |
| 17 | |
| 18 // === for tagging === | |
| 19 public function StartTagging($urlvalues) { | |
| 20 | |
| 21 $this->Initialize($urlvalues); | |
| 22 | |
| 23 $section_id = $this->GetSectionId(); | |
| 24 | |
| 25 $stringInput = $this->GetSectionContent(); | |
| 26 | |
| 27 $taglistArray = $this->GetTaglistArray(); | |
| 28 | |
| 29 $wordlistArray = $this->GetWordlistArray(); | |
| 30 | |
| 31 $data = array(); | |
| 32 $data['stringInput'] = $stringInput; | |
| 33 $data['taglistArray'] = $taglistArray; | |
| 34 $data['wordlistArray'] = $wordlistArray; | |
| 35 $data['section_id'] = $section_id; | |
| 36 | |
| 37 | |
| 38 return $data; | |
| 39 | |
| 40 } | |
| 41 | |
| 42 public function SaveFullText($postdata) { | |
| 43 if ($postdata['text']){ | |
| 44 $date = date('Y_m_d_H_i_s', time()); | |
| 45 if ( file_exists("data/parsing_files/".$postdata['filename'].".txt") ) { | |
| 46 $oldFile = file_get_contents("data/parsing_files/".$postdata['filename'].".txt"); | |
| 47 file_put_contents("data/parsing_files/".$postdata['filename']."_".$date.".txt", $oldFile); | |
| 48 } | |
| 49 | |
| 50 if (get_magic_quotes_gpc()) { | |
| 51 $require = stripslashes($postdata['text']); | |
| 52 } else { | |
| 53 $require = $postdata['text']; | |
| 54 } | |
| 55 | |
| 56 $require = preg_replace("/【<a(.*?)>(.*?)<\/a>】/u", "【\\2】", $require); | |
| 57 $require = preg_replace('/&/u', "&", $require); | |
| 58 $require = preg_replace("/○/u", " ", $require); | |
| 59 $require = preg_replace("/<br>/u", "\n", $require); | |
| 60 $require = preg_replace("/<br>/u", "\n", $require); | |
| 61 file_put_contents("data/parsing_files/".$postdata['filename'].".txt", $require); | |
| 62 } | |
| 63 | |
| 64 } | |
| 65 | |
| 66 | |
| 67 | |
| 68 // === for export table === | |
| 69 public function ExportTable($urlvalues, $postdata) { | |
| 70 $this->Initialize($urlvalues); | |
| 71 $content = $postdata['content']; | |
| 72 | |
| 73 | |
| 74 // outputTableArray: | |
| 75 $section_id = $this->GetSectionId(); | |
| 76 | |
| 77 $section_info = $this->GetSectionInfo(); | |
| 78 | |
| 79 $sectionName = $section_info['sectionName']; | |
| 80 $bookId = $section_info['bookId']; | |
| 81 | |
| 82 $books_info = $this->GetBooksInfo($bookId); | |
| 83 $bookName = $books_info['bookName']; | |
| 84 | |
| 85 $outputTableArray = array(); | |
| 86 | |
| 87 $taglistArray = $this->GetTaglistArray(); | |
| 88 | |
| 89 $outputTableArray[0]=array(); | |
| 90 $outputTableArray[0][0]=array(); | |
| 91 $outputTableArray[0][1]=array(); | |
| 92 foreach ( $taglistArray as $value ) { | |
| 93 $outputTableArray[0][0][$value[2]] = $value[1]; | |
| 94 $outputTableArray[0][1][$value[2]] = $value[1]."(Title)"; | |
| 95 } | |
| 96 $outputTableArray[0]["other"] = "其他"; | |
| 97 $outputTableArray[0]["page"] = "頁數"; | |
| 98 $outputTableArray[0]["full"] = "全文"; | |
| 99 | |
| 100 foreach ( $taglistArray as $tagValue ) { | |
| 101 $content = preg_replace("/<\/".$tagValue[2].">○*<".$tagValue[2].">/u", "", $content); | |
| 102 $content = preg_replace("/<".$tagValue[2].">[ ]*<\/".$tagValue[2].">/u", "", $content); | |
| 103 } | |
| 104 | |
| 105 $contentLineArray = explode( "<br>", $content ); | |
| 106 | |
| 107 $count=0; | |
| 108 $pageNow=NULL; | |
| 109 foreach ( $contentLineArray as $value ) { | |
| 110 $count++; | |
| 111 $recordString = $value; | |
| 112 $otherString = $recordString; | |
| 113 //echo $recordString."<br>\n"; | |
| 114 if ( preg_match("/【<a(.*?)>(.*?)<\/a>】/u", $recordString, $matches) ) { | |
| 115 $pageNow = $matches[2]; | |
| 116 } | |
| 117 foreach ( $taglistArray as $tagValue ) { | |
| 118 if ( preg_match_all("/<".$tagValue[2].">(.*?)<\/".$tagValue[2].">/u", $recordString, $matches, PREG_SET_ORDER) ) { | |
| 119 foreach ( $matches as $matchesValue ) { | |
| 120 $matchesValue[1] = preg_replace("/○/u", "", $matchesValue[1]); | |
| 121 if ( preg_match_all("/〈(.*?)〉/u", $matchesValue[1], $matches2, PREG_SET_ORDER) ) { | |
| 122 foreach ( $matches2 as $matches2Value ) { | |
| 123 if ( isset($outputTableArray[$count][0][$tagValue[2]]) ) { | |
| 124 $outputTableArray[$count][0][$tagValue[2]] .= ";".$matches2Value[1]; | |
| 125 } else { | |
| 126 $outputTableArray[$count][0][$tagValue[2]] = $matches2Value[1]; | |
| 127 } | |
| 128 } | |
| 129 } else { | |
| 130 if ( isset($outputTableArray[$count][0][$tagValue[2]]) ) { | |
| 131 $outputTableArray[$count][0][$tagValue[2]] .= ";".$matchesValue[1]; | |
| 132 } else { | |
| 133 $outputTableArray[$count][0][$tagValue[2]] = $matchesValue[1]; | |
| 134 } | |
| 135 } | |
| 136 } | |
| 137 $otherString = preg_replace("/<".$tagValue[2].">(.*?)<\/".$tagValue[2].">/u", " ", $otherString); | |
| 138 } | |
| 139 } | |
| 140 $otherString = preg_replace("/○/u", "", $otherString); | |
| 141 $outputTableArray[$count]["other"] = $otherString; | |
| 142 $outputTableArray[$count]["page"] = $pageNow; | |
| 143 $value = preg_replace("/>/u", ">", $value); | |
| 144 $value = preg_replace("/</u", "<", $value); | |
| 145 $outputTableArray[$count]["full"] = $value; | |
| 146 } | |
| 147 | |
| 148 foreach ( $outputTableArray as $arrayIndex => $arrayValue ) { | |
| 149 if ( !isset($arrayValue[0]["person"]) ) { | |
| 150 unset($outputTableArray[$arrayIndex]); | |
| 151 } | |
| 152 } | |
| 153 | |
| 154 | |
| 155 $data = array(); | |
| 156 | |
| 157 $data['outputTableArray'] = $outputTableArray; | |
| 158 $data['bookId'] = $bookId; | |
| 159 $data['section_id'] = $section_id; | |
| 160 $data['bookName'] = $bookName; | |
| 161 $data['sectionName'] = $sectionName; | |
| 162 | |
| 163 return $data; | |
| 164 | |
| 165 } | |
| 166 | |
| 167 // === for manage tag list === | |
| 168 public function EditTaglist($urlvalues) { | |
| 169 $this->Initialize($urlvalues); | |
| 170 $result = $this->GetTaglist(); | |
| 171 $taglistArray = array(); | |
| 172 while ($row = mysql_fetch_assoc($result)) { | |
| 173 $taglistArray[$row['id']] = array($row['name'], $row['tag'], $row['color']); | |
| 174 } | |
| 175 | |
| 176 $data = array(); | |
| 177 $data['taglistArray'] = $taglistArray; | |
| 178 return $data; | |
| 179 | |
| 180 } | |
| 181 | |
| 182 private function _GetTag($_postdata) { | |
| 183 if (get_magic_quotes_gpc()) { | |
| 184 $id = stripslashes($_postdata['id']); | |
| 185 $name = stripslashes($_postdata['name']); | |
| 186 $tag = stripslashes($_postdata['tag']); | |
| 187 $color = stripslashes($_postdata['color']); | |
| 188 } else { | |
| 189 $id = $_postdata['id']; | |
| 190 $name = $_postdata['name']; | |
| 191 $tag = $_postdata['tag']; | |
| 192 $color = $_postdata['color']; | |
| 193 } | |
| 194 return array($id, $name, $tag, $color); | |
| 195 } | |
| 196 | |
| 197 public function NewTagElement($postdata) { | |
| 198 if ($postdata['id']){ | |
| 199 list($id, $name, $tag, $color) = $this->_GetTag($postdata); | |
| 200 | |
| 201 $query1 = sprintf("INSERT INTO `taglist` (`id`, `name`, `tag`, `color`, `systemName`) VALUES (%s, %s, %s, %s, %s)", | |
| 202 $this->GetSQLValueString($id, "int"), | |
| 203 $this->GetSQLValueString($name, "text"), | |
| 204 $this->GetSQLValueString($tag, "text"), | |
| 205 $this->GetSQLValueString($color, "text"), | |
| 206 $this->GetSQLValueString($this->systemNAME, "text")); | |
| 207 $result1 = mysql_query($query1); | |
| 208 } | |
| 209 | |
| 210 } | |
| 211 | |
| 212 public function SaveTagElement($postdata) { | |
| 213 if ($postdata['id']){ | |
| 214 list($id, $name, $tag, $color) = $this->_GetTag($postdata); | |
| 215 | |
| 216 $queryInsert = sprintf("UPDATE taglist SET `name`=%s, `tag`=%s, `color`=%s WHERE `id`=%s", | |
| 217 $this->GetSQLValueString($name, "text"), | |
| 218 $this->GetSQLValueString($tag, "text"), | |
| 219 $this->GetSQLValueString($color, "text"), | |
| 220 $this->GetSQLValueString($id, "int")); | |
| 221 $resultInsert = mysql_query($queryInsert); | |
| 222 | |
| 223 } | |
| 224 | |
| 225 } | |
| 226 | |
| 227 public function DeleteTag($postdata) { | |
| 228 if ($postdata['id']) { | |
| 229 $queryInsert = sprintf("DELETE FROM `taglist` WHERE `id` = %s", stripslashes($postdata['id'])); | |
| 230 $resultInsert = mysql_query($queryInsert); | |
| 231 } | |
| 232 | |
| 233 } | |
| 234 // =========================== | |
| 235 | |
| 236 // === for manage wordlist === | |
| 237 public function EditWordlist($urlvalues) { | |
| 238 $this->Initialize($urlvalues); | |
| 239 $result = $this->GetWordlist(); | |
| 240 $wordlistArray = array(); | |
| 241 while ($row = mysql_fetch_assoc($result)) { | |
| 242 $wordlistArray[$row['id']] = $row['name']; | |
| 243 } | |
| 244 | |
| 245 $data = array(); | |
| 246 $data['wordlistArray'] = $wordlistArray; | |
| 247 return $data; | |
| 248 } | |
| 249 | |
| 250 | |
| 251 public function AddNewList($postdata) { | |
| 252 if ($postdata['text']){ | |
| 253 if (get_magic_quotes_gpc()) { | |
| 254 $name = stripslashes($postdata['text']); | |
| 255 } else { | |
| 256 $name = $postdata['text']; | |
| 257 } | |
| 258 | |
| 259 $query1 = sprintf("INSERT INTO `wordlist` (`name`, `systemName`) VALUES (%s, %s)", | |
| 260 $this->GetSQLValueString($name, "text"), | |
| 261 $this->GetSQLValueString($this->systemNAME, "text")); | |
| 262 $result1 = mysql_query($query1); | |
| 263 file_put_contents( "data/wordlist/".mysql_insert_id().".txt", "(empty now)"); | |
| 264 } | |
| 265 } | |
| 266 | |
| 267 | |
| 268 public function SaveWordlist($postdata) { | |
| 269 | |
| 270 if ($postdata['text']){ | |
| 271 $date = date('Y_m_d_H_i_s', time()); | |
| 272 if ( file_exists("data/wordlist/".$postdata['filename'].".txt") ) { | |
| 273 $filename = "data/wordlist/".$postdata['filename']."_".$date.".txt"; | |
| 274 $oldFile = file_get_contents("data/wordlist/".$postdata['filename'].".txt"); | |
| 275 file_put_contents($filename, $oldFile); | |
| 276 } else { | |
| 277 $filename = "data/wordlist/".$postdata['filename'].".txt"; | |
| 278 } | |
| 279 | |
| 280 | |
| 281 if (get_magic_quotes_gpc()) { | |
| 282 $require = stripslashes($postdata['text']); | |
| 283 } else { | |
| 284 $require = $postdata['text']; | |
| 285 } | |
| 286 | |
| 287 $require = preg_replace("/<br>/u", "<br>", $require); | |
| 288 file_put_contents($filename, $require); | |
| 289 } | |
| 290 /* | |
| 291 if ($postdata['text']){ | |
| 292 $date = date('Y_m_d_H_i_s', time()); | |
| 293 if ( file_exists("data/wordlist/".$postdata['filename'].".txt") ) { | |
| 294 $oldFile = file_get_contents("data/wordlist/".$postdata['filename'].".txt"); | |
| 295 file_put_contents("data/wordlist/".$postdata['filename']."_".$date.".txt", $oldFile); | |
| 296 } | |
| 297 | |
| 298 if (get_magic_quotes_gpc()) { | |
| 299 $require = stripslashes($postdata['text']); | |
| 300 } else { | |
| 301 $require = $postdata['text']; | |
| 302 } | |
| 303 | |
| 304 // $require = preg_replace("/<br>/u", "", $require); | |
| 305 file_put_contents("data/wordlist/".$postdata['filename'].".txt", $require); | |
| 306 } | |
| 307 */ | |
| 308 } | |
| 309 | |
| 310 | |
| 311 // ======================================= | |
| 312 | |
| 313 public function sortFunction($a,$b) { | |
| 314 return strlen($b)-strlen($a); | |
| 315 } | |
| 316 | |
| 317 | |
| 318 private function SetSectionId($_urlvalues) { | |
| 319 // TODO: maybe get user info also | |
| 320 // get book id from url | |
| 321 if ($_urlvalues['id'] != "") { | |
| 322 $section_id = $_urlvalues['id']; | |
| 323 } else { | |
| 324 return json_encode("Error: No section id"); | |
| 325 /* ???? */ | |
| 326 /* | |
| 327 $get_book_id = $urlvalues['book']; | |
| 328 $get_start = $urlvalues['start']; | |
| 329 $get_end = $urlvalues['end']; | |
| 330 */ | |
| 331 } | |
| 332 $this->section_id = $section_id; | |
| 333 | |
| 334 } | |
| 335 | |
| 336 private function GetSectionId() { | |
| 337 if ($this->section_id) { | |
| 338 return $this->section_id; | |
| 339 } else { | |
| 340 return json_encode("Error: No section id"); | |
| 341 } | |
| 342 } | |
| 343 | |
| 344 private function GetSectionInfo() { | |
| 345 $section_id = $this->GetSectionId(); | |
| 346 if (!is_numeric($section_id)){ | |
| 347 return $section_id; | |
| 348 } | |
| 349 | |
| 350 $result = $this->GetSectionsByID($section_id); | |
| 351 | |
| 352 | |
| 353 while ($row = mysql_fetch_assoc($result)) { | |
| 354 $bookId=$row['books_id']; | |
| 355 $startPage=$row['start_page']; | |
| 356 $endPage=$row['end_page']; | |
| 357 $sectionName = $row['name']; | |
| 358 } | |
| 359 | |
| 360 $data = array(); | |
| 361 $data['bookId'] = $bookId; | |
| 362 $data['startPage'] = $startPage; | |
| 363 $data['endPage'] = $endPage; | |
| 364 $data['sectionName'] = $sectionName; | |
| 365 | |
| 366 return $data; | |
| 367 } | |
| 368 | |
| 369 private function GetSectionContent() { | |
| 370 $section_id = $this->GetSectionId(); | |
| 371 $section_info = $this->GetSectionInfo(); | |
| 372 | |
| 373 $bookId = $section_info['bookId']; | |
| 374 $startPage = $section_info['startPage']; | |
| 375 $endPage = $section_id['endPage']; | |
| 376 | |
| 377 | |
| 378 $contentString=""; | |
| 379 $data_path = $this->GetDataPath(); | |
| 380 if ( file_exists($data_path."parsing_files/".$section_id.".txt") ) { | |
| 381 $contentString=file_get_contents($data_path."parsing_files/".$section_id.".txt"); | |
| 382 $stringInput = $contentString; | |
| 383 $stringInput = preg_replace("/ /u", "○", $stringInput); | |
| 384 $stringInput = preg_replace("/\n/u", "<br>", $stringInput); | |
| 385 $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"review_index_xml_images.php?books_id=".$bookId."&pages=\\1&entry=0\" target=\"_bookImg\">\\1</a>】", $stringInput); | |
| 386 } else { | |
| 387 $query = sprintf("SELECT `content`, `line`, `books_id` FROM `contents` WHERE `books_id`=\"%s\" AND `line`>=%d AND `line`<=%d", $bookId, $startPage, $endPage); | |
| 388 $result = mysql_query($query); | |
| 389 while ($row = mysql_fetch_assoc($result)) { | |
| 390 $contentString.="【".$row['line']."】".$row['content']."\n"; | |
| 391 } | |
| 392 $stringInput = $contentString; | |
| 393 $stringInput = preg_replace("/<(.*?)>/u", "○", $stringInput); | |
| 394 $stringInput = preg_replace("/ /u", "○", $stringInput); | |
| 395 $stringInput = preg_replace("/\n/u", "<br>", $stringInput); | |
| 396 $stringInput = preg_replace("/【(.*?)】/u", "【<a href=\"review_index_xml_images.php?books_id=".$bookId."&pages=\\1&entry=0\" target=\"_bookImg\">\\1</a>】", $stringInput); | |
| 397 } | |
| 398 | |
| 399 return $stringInput; | |
| 400 } | |
| 401 | |
| 402 private function GetDataPath() { | |
| 403 return getcwd()."/data/"; // get the current file path, which is getcwd(), and concatenate with "/data/" | |
| 404 } | |
| 405 | |
| 406 private function GetTaglistArray() { | |
| 407 $taglistArray=""; | |
| 408 $result = $this->GetTaglist(); | |
| 409 while ($row = mysql_fetch_assoc($result)) { | |
| 410 $taglistArray[] = array( $row['id'], $row['name'], $row['tag'], $row['color'] ); | |
| 411 | |
| 412 } | |
| 413 | |
| 414 return $taglistArray; | |
| 415 | |
| 416 } | |
| 417 | |
| 418 private function GetWordlistArray() { | |
| 419 $wordlistArray=""; | |
| 420 $result = $this->GetWordlist(); | |
| 421 while ($row = mysql_fetch_assoc($result)) { | |
| 422 $listString = file_get_contents("wordlist/".$row['id'].".txt"); | |
| 423 $listString = preg_replace("/<div>/u", "\n", $listString); | |
| 424 $listString = preg_replace("/<\/div>/u", "", $listString); | |
| 425 $listString = preg_replace("/<span(.*?)>/u", "", $listString); | |
| 426 $listString = preg_replace("/<\/span>/u", "", $listString); | |
| 427 //$listString = preg_replace("/\n/u", "|", $listString); | |
| 428 | |
| 429 $wordlistArray2 = explode( "\n", $listString ); | |
| 430 usort($wordlistArray2,'sortFunction'); | |
| 431 foreach ( $wordlistArray2 as $index=>$value ) { | |
| 432 $wordlistArray2[$index] = implode("○?", preg_split("/(?<!^)(?!$)/u", $value)); | |
| 433 } | |
| 434 foreach ( $wordlistArray2 as $index=>$value ) { | |
| 435 if ($value=="") unset($wordlistArray2[$index]); | |
| 436 | |
| 437 } | |
| 438 $listString = implode("|", $wordlistArray2); | |
| 439 | |
| 440 if ( $listString[0]=="|" ) $listString = substr($listString, 1); | |
| 441 $wordlistArray[] = array( $row['id'], $row['name'], $listString ); | |
| 442 } | |
| 443 | |
| 444 | |
| 445 return $wordlistArray; | |
| 446 | |
| 447 } | |
| 448 | |
| 449 protected function GetBooksInfo($bookId) { | |
| 450 $result = $this->GetSectionsByID($bookId); | |
| 451 while ($row = mysql_fetch_assoc($result)) { | |
| 452 $bookName = $row['name']; | |
| 453 } | |
| 454 | |
| 455 $data = array(); | |
| 456 $data['bookName'] = $bookName; | |
| 457 | |
| 458 return $data; | |
| 459 } | |
| 460 | |
| 461 | |
| 462 } | |
| 463 | |
| 464 | |
| 465 | |
| 466 ?> |
