from types import * definedFields=['WEB_title','xdata_01','xdata_02','xdata_03','xdata_04','xdata_05','xdata_06','xdata_07','xdata_08','xdata_09','xdata_10','xdata_11','xdata_12','xdata_13','WEB_project_header','WEB_project_description','WEB_related_pub'] checkFields = ['xdata_01'] #ersetzt logging def logger(txt,method,txt2): """logging""" logging.info(txt+ txt2) def getTextFromNode(nodename): nodelist=nodename.childNodes rc = "" for node in nodelist: if node.nodeType == node.TEXT_NODE: rc = rc + node.data return rc def sortStopWordsF(self,xo,yo): if not hasattr(self,'_v_stopWords'): self._v_stopWords=self.stopwords_en.data.split("\n") x=str(xo[1]) y=str(yo[1]) strx=x.split(" ") stry=y.split(" ") for tmp in strx: if tmp.lower() in self._v_stopWords: del strx[strx.index(tmp)] for tmp in stry: if tmp.lower() in self._v_stopWords: del stry[stry.index(tmp)] return cmp(" ".join(strx)," ".join(stry)) def sortStopWords(self): return lambda x,y : sortStopWordsF(self,x,y) def sortF(x,y): try: return cmp(x[1],y[1]) except: try: return cmp(str(x[1]),str(y[1])) except: return 0 def sortI(x,y): xsplit=x[1].split(".") ysplit=y[1].split(".") xret="" yret="" try: for i in range(5): try: yret=yret+"%04i"%int(xsplit[i]) except: yret=yret+"%04i"%0 try: xret=xret+"%04i"%int(ysplit[i]) except: xret=xret+"%04i"%0 return cmp(int(yret),int(xret)) except: return cmp(x[1],y[1]) def unicodify(str): """decode str (utf-8 or latin-1 representation) into unicode object""" if not str: return "" if type(str) is StringType: try: return str.decode('utf-8') except: return str.decode('latin-1') else: return str def utf8ify(str): """encode unicode object or string into byte string in utf-8 representation""" if not str: return "" if type(str) is StringType: return str else: return str.encode('utf-8')