Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

source: documentViewer/HocrTxtUtils.py

Last change on this file was 617:7aefbddddaf9, checked in by dwinter, 10 years ago
alpaha of hocr server support
File size: 4.5 KB

Line
1	"""Utility methods for handling XML, reading HTTP, etc"""
2
3	from App.ImageFile import ImageFile
4	from App.Common import rfc1123_date
5
6	import sys
7	import os
8	import stat
9	import urllib
10	import urllib2
11	import logging
12
13
14	HocrTxtUtilsVersion = "0.1"
15
16	def getInt(number, default=0):
17	"""returns always an int (0 in case of problems)"""
18	try:
19	return int(number)
20	except:
21	return int(default)
22
23	def getAt(array, idx, default=None):
24	"""returns element idx from array or default (in case of problems)"""
25	try:
26	return array[idx]
27	except:
28	return default
29
30	def unicodify(s):
31	"""decode str (utf-8 or latin-1 representation) into unicode object"""
32	if not s:
33	return u""
34	if isinstance(s, str):
35	try:
36	return s.decode('utf-8')
37	except:
38	return s.decode('latin-1')
39	else:
40	return unicode(s)
41
42	def utf8ify(s):
43	"""encode unicode object or string into byte string in utf-8 representation.
44	assumes string objects to be utf-8"""
45	if not s:
46	return ""
47	if isinstance(s, unicode):
48	return s.encode('utf-8')
49	else:
50	return str(s)
51
52	def getText(node, recursive=0):
53	"""returns all text content of a node and its subnodes"""
54	if node is None:
55	return ''
56
57	# ElementTree:
58	text = node.text or ''
59	for e in node:
60	if recursive:
61	text += getText(e)
62	else:
63	text += e.text or ''
64	if e.tail:
65	text += e.tail
66
67	# 4Suite:
68	#nodelist=node.childNodes
69	#text = ""
70	#for n in nodelist:
71	# if n.nodeType == node.TEXT_NODE:
72	# text = text + n.data
73
74	return text
75
76
77
78	def getHttpData(url, pn=1,data=None, num_tries=3, timeout=10, noExceptions=False):
79	"""returns result from url+data HTTP request"""
80	# we do GET (by appending data to url)
81	if isinstance(data, str) or isinstance(data, unicode):
82	# if data is string then append
83	url = "%s?pn=%s&%s"%(url,pn,data)
84	elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
85	# urlencode
86	url = "%s?pn=%s&%s"%(url,pn,urllib.urlencode(data))
87
88	response = None
89	errmsg = None
90	for cnt in range(num_tries):
91	try:
92	logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
93	if sys.version_info < (2, 6):
94	# set timeout on socket -- ugly :-(
95	import socket
96	socket.setdefaulttimeout(float(timeout))
97	response = urllib2.urlopen(url)
98	else:
99	# timeout as parameter
100	response = urllib2.urlopen(url,timeout=float(timeout))
101	# check result?
102	break
103	except urllib2.HTTPError, e:
104	logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
105	errmsg = str(e)
106	# stop trying
107	break
108	except urllib2.URLError, e:
109	logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
110	errmsg = str(e)
111	# stop trying
112	#break
113
114	if response is not None:
115	data = response.read()
116	response.close()
117	return data
118
119	if noExceptions:
120	return None
121
122	raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
123	#return None
124
125
126	def refreshingImageFileIndexHtml(self, REQUEST, RESPONSE):
127	"""index_html method for App.ImageFile that updates the file info for each request."""
128	stat_info = os.stat(self.path)
129	self.size = stat_info[stat.ST_SIZE]
130	self.lmt = float(stat_info[stat.ST_MTIME]) or time.time()
131	self.lmh = rfc1123_date(self.lmt)
132	# call original method
133	return ImageFile.index_html(self, REQUEST, RESPONSE)
134
135
136	def getBrowserType(self):
137	"""check the browsers request to find out the browser type"""
138	bt = {}
139	ua = self.REQUEST.get_header("HTTP_USER_AGENT")
140	bt['ua'] = ua
141	bt['isIE'] = False
142	bt['isN4'] = False
143	if string.find(ua, 'MSIE') > -1:
144	bt['isIE'] = True
145	else:
146	bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1)
147
148	try:
149	nav = ua[string.find(ua, '('):]
150	ie = string.split(nav, "; ")[1]
151	if string.find(ie, "MSIE") > -1:
152	bt['versIE'] = string.split(ie, " ")[1]
153	except: pass
154
155	bt['isMac'] = string.find(ua, 'Macintosh') > -1
156	bt['isWin'] = string.find(ua, 'Windows') > -1
157	bt['isIEWin'] = bt['isIE'] and bt['isWin']
158	bt['isIEMac'] = bt['isIE'] and bt['isMac']
159	bt['staticHTML'] = False
160
161	return bt
162
163

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: documentViewer/HocrTxtUtils.py

Download in other formats: