Mercurial > hg > extraction-interface
annotate interface/insert_new_columns_into_books/get_data_from_sinica.php @ 20:04db1dd9d10d
update topic_regex_relation table when saving (new) regex file
author | Zoe Hong <zhong@mpiwg-berlin.mpg.de> |
---|---|
date | Mon, 23 Feb 2015 14:34:43 +0100 |
parents | b12c99b7c3f0 |
children |
rev | line source |
---|---|
0
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
2 <html> |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
3 <head> |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
4 <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/> |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
5 <style type="text/css"> |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
6 .row{ |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
7 display:block; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
8 margin-bottom:20px; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
9 } |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
10 .column{ |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
11 display:inline-block; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
12 width:90px; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
13 vertical-align:top; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
14 } |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
15 </style> |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
16 <!--<script src="js/check_sections.js" charset="utf-8"></script>!--> |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
17 </head> |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
18 <body> |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
19 <?php |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
20 set_time_limit(0); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
21 ini_set('memory_limit', '-1'); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
22 $columnNameArray=array(); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
23 $count=0; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
24 for($i=1; $i<=71; $i++){ |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
25 $url="http://webgis.sinica.edu.tw/place/query.asp?Page=".$i."&Page_setup=500&A1=%AC%D9%A5%F7&B1=containing&C1=&D1=AND&A2=99&B2=containing&C2=&D2=AND&A3=99&B3=containing&C3="; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
26 $bookInfoArray=getBookInfo($url); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
27 $fileName=sprintf("%02d",$i).".csv"; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
28 saveBookInfo($bookInfoArray,$fileName); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
29 $count+=sizeof($bookInfoArray); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
30 //break; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
31 } |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
32 saveColumnName($columnNameArray); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
33 echo "<br><br>".$count."<br>"; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
34 ?> |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
35 </body> |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
36 </html> |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
37 <?php |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
38 function getBookInfo($url){ |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
39 global $columnNameArray; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
40 $bookListDoc=new DOMDocument(); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
41 $data=getData($url); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
42 $data=mb_convert_encoding($data,"HTML-ENTITIES","BIG5"); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
43 $bookInfoArray=array(); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
44 @$bookListDoc->loadHTML($data); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
45 $bookList=$bookListDoc->getElementsByTagName("a"); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
46 $bookNumber=0; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
47 foreach($bookList as $entry){ |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
48 $link=$entry->attributes->getNamedItem("href")->value; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
49 $pattern="/detail.asp\?ID=([0-9]+)&Source=([0-9]+)/u"; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
50 if(preg_match($pattern,$link,$match)){ |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
51 $id=$match[1]; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
52 $source=$match[2]; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
53 $link="http://webgis.sinica.edu.tw/place/".$link; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
54 $data=getData($link); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
55 /* |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
56 $bookInfoDoc=new DOMDocument(); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
57 @$bookInfoDoc->loadHTML($data); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
58 $bookInfoDoc->preserveWhiteSpace = false; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
59 $th=$bookInfoDoc->getElementsByTagName("th"); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
60 $bookInfoArray[$bookNumber][0]=$source; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
61 $columnNameArray[$source][0]=$source; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
62 $infoNumber=1; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
63 foreach($th as $row){ |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
64 $columnName=$row->nodeValue; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
65 $columnName=str_replace(":","",$columnName); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
66 $columnName=trim($columnName); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
67 $columnNameArray[$source][$infoNumber]=$columnName; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
68 $info=$row->nextSibling->nodeValue; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
69 $bookInfoArray[$bookNumber][$infoNumber]=$info; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
70 $infoNumber++; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
71 }*/ |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
72 $bookInfoArray[$bookNumber][0]=$id; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
73 $bookInfoArray[$bookNumber][1]=$source; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
74 $bookInfoArray['big5'][$bookNumber][0]=$id; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
75 $bookInfoArray['big5'][$bookNumber][1]=$source; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
76 $columnNameArray[$source][0]=$source; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
77 $columnNameArray['big5'][$source][0]=$source; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
78 |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
79 $infoNumber=2; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
80 //$pattern='/<th class="calc" align="right" valign="top" width="100">(.*)<\/td><td class="calc" align="left" valign="top">(.*)<\/td>/'; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
81 $pattern='/<th class="calc" align="right" valign="top" width="100">(.*)<\/td>[\s]*<td class="calc" align="left" valign="top">(.*)<\/td>/'; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
82 //$pattern='/<td[^>]+>(.*)<\/td>/'; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
83 if(preg_match_all($pattern,$data,$match)){ |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
84 foreach($match[1] as $idx=>$th){ |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
85 $pattern="/[\x81-\xA0][\x40-\xFE]/"; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
86 $th=str_replace("\xA1\x47","",$th);//remove colons |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
87 $th=trim($th); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
88 $columnNameArray['big5'][$source][$infoNumber]=$th; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
89 $th=preg_replace($pattern,"\xA1\xBC",$th);//replace the self-defined characters with a square |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
90 $th=mb_convert_encoding($th,"UTF-8","BIG5"); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
91 $columnNameArray[$source][$infoNumber]=$th; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
92 $info=$match[2][$idx]; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
93 $bookInfoArray['big5'][$bookNumber][$infoNumber]=$info; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
94 $info=preg_replace($pattern,"\xA1\xBC",$info);//replace the self-defined characters with a square |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
95 $info=mb_convert_encoding($info,"UTF-8","BIG5"); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
96 $bookInfoArray[$bookNumber][$infoNumber]=$info; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
97 $infoNumber++; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
98 } |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
99 } |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
100 $bookNumber++; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
101 } |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
102 } |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
103 return $bookInfoArray; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
104 } |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
105 function saveColumnName($columnNameArray){ |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
106 $fp=fopen("./data_from_sinica/column_name.csv","w"); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
107 foreach($columnNameArray as $idx=>$columnName){ |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
108 if($idx==="big5"){ |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
109 continue; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
110 } |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
111 echo "<div class='row'>"; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
112 fputcsv($fp,$columnName,"\t"); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
113 foreach($columnName as $name){ |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
114 echo "<div class='column'>".$name."</div>"; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
115 } |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
116 echo "</div>"; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
117 } |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
118 fclose($fp); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
119 $fp=fopen("./data_from_sinica/big5_column_name.csv","w"); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
120 foreach($columnNameArray['big5'] as $idx=>$columnName){ |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
121 echo "<div class='row'>"; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
122 fputcsv($fp,$columnName,"\t"); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
123 foreach($columnName as $name){ |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
124 echo "<div class='column'>".$name."</div>"; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
125 } |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
126 echo "</div>"; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
127 } |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
128 fclose($fp); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
129 } |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
130 function saveBookInfo($bookInfoArray,$fileName){ |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
131 $fp=fopen("./data_from_sinica/".$fileName,"w"); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
132 foreach($bookInfoArray as $idx=>$book){ |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
133 if($idx==="big5"){ |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
134 continue; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
135 } |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
136 echo "<div class='row'>"; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
137 fputcsv($fp,$book,"\t"); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
138 foreach($book as $info){ |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
139 echo "<div class='column'>".$info."</div>"; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
140 } |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
141 echo "</div>"; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
142 } |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
143 fclose($fp); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
144 $fp=fopen("./data_from_sinica/big5_".$fileName,"w"); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
145 foreach($bookInfoArray['big5'] as $idx=>$book){ |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
146 echo "<div class='row'>"; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
147 fputcsv($fp,$book,"\t"); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
148 foreach($book as $info){ |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
149 echo "<div class='column'>".$info."</div>"; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
150 } |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
151 echo "</div>"; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
152 } |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
153 fclose($fp); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
154 } |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
155 function getData($url){ |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
156 $curl=curl_init(); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
157 $timeout=5000; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
158 curl_setopt($curl,CURLOPT_URL,$url); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
159 curl_setopt($curl,CURLOPT_RETURNTRANSFER,1); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
160 curl_setopt($curl,CURLOPT_CONNECTTIMEOUT,$timeout); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
161 $data=curl_exec($curl); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
162 curl_close($curl); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
163 $data=str_replace(" ","",$data); |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
164 return $data; |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
165 } |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
166 |
b12c99b7c3f0
commit for previous development
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
167 ?> |