version 1.54, 2010/10/05 12:13:59
|
version 1.55, 2010/10/05 12:56:02
|
Line 130 class MpdlXmlTextServer(SimpleItem):
|
Line 130 class MpdlXmlTextServer(SimpleItem):
|
|
|
def getNumTextPages (self, docinfo): |
def getNumTextPages (self, docinfo): |
"""get list of pages from fulltext (texts without images) and put in docinfo""" |
"""get list of pages from fulltext (texts without images) and put in docinfo""" |
if 'numPages' in docinfo: |
# if 'numPages' in docinfo: |
# allredy there |
# allredy there |
return docinfo |
# return docinfo |
xpath ='/count(//pb)' |
# xpath ='/count(//pb)' |
text=self.getServerData("xpath.xql", "document=%s&xpath=%s"%(docinfo['textURLPath'], xpath)) |
# text=self.getServerData("xpath.xql", "document=%s&xpath=%s"%(docinfo['textURLPath'], xpath)) |
logging.debug("documentViewer (text) text: %s"%(text)) |
# logging.debug("documentViewer (text) text: %s"%(text)) |
#docinfo['numPages']=text('resultPage') |
#docinfo['numPages']=text('resultPage') |
dom = Parse(text) |
# dom = Parse(text) |
|
|
result= dom.xpath("//result/resultPage") |
# result= dom.xpath("//result/resultPage") |
docinfo['numPages']=int(getTextFromNode(result[0])) |
# docinfo['numPages']=int(getTextFromNode(result[0])) |
#logging.debug("documentViewer (characterNormalization) docinfo['numPages']: %s"%(docinfo['numPages'])) |
#logging.debug("documentViewer (characterNormalization) docinfo['numPages']: %s"%(docinfo['numPages'])) |
return docinfo |
return docinfo |
|
|