annotate SrvTxtUtils.py @ 9:eeaad777d3d7

more work for non-bib metadata
author casties
date Thu, 28 Jul 2011 18:08:58 +0200
parents e4bae49e657b
children 7f0e2b656e5c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
1 """Utility methods for handling XML, reading HTTP, etc"""
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
2
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
3 import sys
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
4 import urllib
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
5 import urllib2
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
6 import logging
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
7
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
8
1
e4bae49e657b more renovation
casties
parents: 0
diff changeset
9 srvTxtUtilsVersion = "1.1"
0
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
10
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
11 def getInt(number, default=0):
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
12 """returns always an int (0 in case of problems)"""
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
13 try:
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
14 return int(number)
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
15 except:
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
16 return int(default)
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
17
1
e4bae49e657b more renovation
casties
parents: 0
diff changeset
18 def getAt(array, idx, default=None):
e4bae49e657b more renovation
casties
parents: 0
diff changeset
19 """returns element idx from array or default (in case of problems)"""
e4bae49e657b more renovation
casties
parents: 0
diff changeset
20 try:
e4bae49e657b more renovation
casties
parents: 0
diff changeset
21 return array[idx]
e4bae49e657b more renovation
casties
parents: 0
diff changeset
22 except:
e4bae49e657b more renovation
casties
parents: 0
diff changeset
23 return default
e4bae49e657b more renovation
casties
parents: 0
diff changeset
24
0
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
25 def getText(node):
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
26 """returns all text content of a node and its subnodes"""
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
27 if node is None:
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
28 return ""
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
29 # ElementTree:
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
30 text = node.text or ""
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
31 for e in node:
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
32 text += gettext(e)
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
33 if e.tail:
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
34 text += e.tail
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
35
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
36 # 4Suite:
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
37 #nodelist=node.childNodes
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
38 #text = ""
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
39 #for n in nodelist:
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
40 # if n.nodeType == node.TEXT_NODE:
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
41 # text = text + n.data
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
42
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
43 return text
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
44
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
45
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
46
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
47 def getHttpData(url, data=None, num_tries=3, timeout=10):
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
48 """returns result from url+data HTTP request"""
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
49 # we do GET (by appending data to url)
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
50 if isinstance(data, str) or isinstance(data, unicode):
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
51 # if data is string then append
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
52 url = "%s?%s"%(url,data)
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
53 elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
54 # urlencode
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
55 url = "%s?%s"%(url,urllib.urlencode(data))
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
56
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
57 response = None
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
58 errmsg = None
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
59 for cnt in range(num_tries):
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
60 try:
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
61 logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
62 if sys.version_info < (2, 6):
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
63 # set timeout on socket -- ugly :-(
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
64 import socket
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
65 socket.setdefaulttimeout(float(timeout))
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
66 response = urllib2.urlopen(url)
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
67 else:
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
68 # timeout as parameter
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
69 response = urllib2.urlopen(url,timeout=float(timeout))
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
70 # check result?
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
71 break
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
72 except urllib2.HTTPError, e:
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
73 logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
74 errmsg = str(e)
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
75 # stop trying
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
76 break
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
77 except urllib2.URLError, e:
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
78 logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
79 errmsg = str(e)
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
80 # stop trying
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
81 #break
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
82
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
83 if response is not None:
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
84 data = response.read()
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
85 response.close()
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
86 return data
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
87
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
88 raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
89 #return None
9f9d9be26e53 first checkin in Mercurial (see history in SVN)
casties
parents:
diff changeset
90