version 1.1, 2008/06/24 11:16:27
|
version 1.1.2.1, 2008/06/24 11:16:27
|
Line 0
|
Line 1
|
|
definedFields=['WEB_title','xdata_01','xdata_02','xdata_03','xdata_04','xdata_05','xdata_06','xdata_07','xdata_08','xdata_09','xdata_10','xdata_11','xdata_12','xdata_13','WEB_project_header','WEB_project_description','WEB_related_pub'] |
|
|
|
checkFields = ['xdata_01'] |
|
|
|
#ersetzt logging |
|
def logger(txt,method,txt2): |
|
"""logging""" |
|
logging.info(txt+ txt2) |
|
|
|
def getTextFromNode(nodename): |
|
|
|
nodelist=nodename.childNodes |
|
rc = "" |
|
for node in nodelist: |
|
if node.nodeType == node.TEXT_NODE: |
|
rc = rc + node.data |
|
return rc |
|
|
|
|
|
def sortStopWordsF(self,xo,yo): |
|
if not hasattr(self,'_v_stopWords'): |
|
self._v_stopWords=self.stopwords_en.data.split("\n") |
|
|
|
x=str(xo[1]) |
|
y=str(yo[1]) |
|
|
|
strx=x.split(" ") |
|
stry=y.split(" ") |
|
|
|
for tmp in strx: |
|
if tmp.lower() in self._v_stopWords: |
|
del strx[strx.index(tmp)] |
|
|
|
for tmp in stry: |
|
if tmp.lower() in self._v_stopWords: |
|
del stry[stry.index(tmp)] |
|
|
|
return cmp(" ".join(strx)," ".join(stry)) |
|
|
|
def sortStopWords(self): |
|
return lambda x,y : sortStopWordsF(self,x,y) |
|
|
|
def sortF(x,y): |
|
try: |
|
return cmp(x[1],y[1]) |
|
except: |
|
try: |
|
return cmp(str(x[1]),str(y[1])) |
|
except: |
|
|
|
return 0 |
|
|
|
def sortI(x,y): |
|
xsplit=x[1].split(".") |
|
ysplit=y[1].split(".") |
|
xret="" |
|
yret="" |
|
try: |
|
for i in range(5): |
|
try: |
|
yret=yret+"%04i"%int(xsplit[i]) |
|
except: |
|
yret=yret+"%04i"%0 |
|
|
|
try: |
|
xret=xret+"%04i"%int(ysplit[i]) |
|
except: |
|
xret=xret+"%04i"%0 |
|
|
|
|
|
return cmp(int(yret),int(xret)) |
|
except: |
|
return cmp(x[1],y[1]) |
|
|
|
|
|
def unicodify(str): |
|
"""decode str (utf-8 or latin-1 representation) into unicode object""" |
|
if not str: |
|
return "" |
|
if type(str) is StringType: |
|
try: |
|
return str.decode('utf-8') |
|
except: |
|
return str.decode('latin-1') |
|
else: |
|
return str |
|
|
|
def utf8ify(str): |
|
"""encode unicode object or string into byte string in utf-8 representation""" |
|
if not str: |
|
return "" |
|
if type(str) is StringType: |
|
return str |
|
else: |
|
return str.encode('utf-8') |
|
|