0
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1 from urllib import request
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
2
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
3 class converter:
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
4
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
5 out ="/var/tmp/out.csv"
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
6 url = "http://md.mpiwg-berlin.mpg.de/purls/search?q=" # /permanent/library/W028ATN3
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
7 def read(self,fn):
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
8
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
9 of = open(self.out,"w")
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
10
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
11 fl = open(fn,"r")
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
12
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
13 for line in fl.readlines():
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
14 line = line.replace("\n","")
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
15 line = line.replace('"','')
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
16 splitted = line.split(",")
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
17
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
18 path = splitted[0].replace('"','')
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
19 path = path.replace("/mpiwg/online","")
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
20
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
21 qr = self.url + path
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
22
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
23 g = request.urlopen(qr, data=bytearray(path,"utf-8"))
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
24
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
25 p = g.read().decode('utf-8')
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
26
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
27 spl = p.split("\t")
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
28
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
29 if len(spl) > 1:
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
30 dri = spl[1]
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
31
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
32 splitted.append(dri)
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
33
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
34 print (splitted)
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
35
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
36 of.write(",".join(splitted)+"\n")
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
37
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
38
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
39 of.close()
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
40
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
41
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
42
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
43
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
44 cv =converter()
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
45
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
46 cv.read("/var/tmp/archive.csv")
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
47
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
48
|
Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
49
|