version 1.1.2.32, 2012/01/03 09:26:45
|
version 1.1.2.34, 2012/01/09 07:33:30
|
Line 21 from bibliography import *
|
Line 21 from bibliography import *
|
import time |
import time |
import xml.dom.minidom |
import xml.dom.minidom |
import sys |
import sys |
from Ft.Xml.XPath import Evaluate |
#from Ft.Xml.XPath import Evaluate |
from Ft.Xml.XPath.Context import Context |
#from Ft.Xml.XPath.Context import Context |
from Ft.Xml.Domlette import NonvalidatingReader,PrettyPrint, Print |
#from Ft.Xml.Domlette import NonvalidatingReader,PrettyPrint, Print |
from Ft.Xml import EMPTY_NAMESPACE |
#from Ft.Xml import EMPTY_NAMESPACE |
import copy |
import copy |
import updatePersonalWWW |
import updatePersonalWWW |
import MPIWGStaff |
import MPIWGStaff |
from MPIWGHelper import * |
from MPIWGHelper import * |
|
from BeautifulSoup import BeautifulSoup, Comment |
|
|
def sortWeight(x,y): |
def sortWeight(x,y): |
x1=int(getattr(x[1],'weight','0')) |
x1=int(getattr(x[1],'weight','0')) |
Line 119 class MPIWGRoot(ZSQLExtendFolder):
|
Line 119 class MPIWGRoot(ZSQLExtendFolder):
|
#if not words is ListType: |
#if not words is ListType: |
# words=[words] |
# words=[words] |
|
|
txt=obj.harvest_page() |
txt=obj.harvest_page(mode="slim") |
if not txt: |
if not txt: |
return ret |
return ret |
txt=re.sub("<.*?>", "", txt) # loesche alle Tags |
|
|
soup = BeautifulSoup(txt) |
|
|
|
comments = soup.findAll(text=lambda text:isinstance(text, Comment)) |
|
[comment.extract() for comment in comments] |
|
|
|
txt = ''.join(soup.findAll(text=True)) |
|
|
|
|
|
#txt=re.sub("<.*?>", "", txt) # loesche alle Tags |
for word in words: |
for word in words: |
word=re.sub("_"," ",word) # ersetze zurueck "_" durch " " |
word=re.sub("_"," ",word) # ersetze zurueck "_" durch " " |
pos=0 |
pos=0 |
Line 144 class MPIWGRoot(ZSQLExtendFolder):
|
Line 153 class MPIWGRoot(ZSQLExtendFolder):
|
y=max(ranges[nr][1],y) |
y=max(ranges[nr][1],y) |
|
|
str=txt[x:y] |
str=txt[x:y] |
|
if x!=0: #add dots if in the middle of text |
|
str="..."+str |
|
|
|
if y!=len(txt): #add dots if in the middle of text |
|
str=str+"..." |
|
|
|
|
|
|
if nr >=0: # word ist in einer schon gefunden Umgebung |
if nr >=0: # word ist in einer schon gefunden Umgebung |
ranges[nr]=(x,y) # neue Position der Umgebung |
ranges[nr]=(x,y) # neue Position der Umgebung |
Line 1504 class MPIWGRoot(ZSQLExtendFolder):
|
Line 1520 class MPIWGRoot(ZSQLExtendFolder):
|
return "done" |
return "done" |
|
|
|
|
|
def sortResults(self,results): |
|
"""search the catalog and give results back sorted by meta_type""" |
|
ret = {} |
|
logging.debug(results()) |
|
for result in results(): |
|
metaType = result.meta_type |
|
resultList= ret.get(metaType,[]) |
|
resultList.append(result) |
|
ret[metaType]=resultList |
|
|
|
logging.debug(ret) |
|
return ret |
|
|
|
|
def manage_addMPIWGRootForm(self): |
def manage_addMPIWGRootForm(self): |
"""form for adding the root""" |
"""form for adding the root""" |