version 1.238.2.8, 2011/08/05 09:24:42
|
version 1.238.2.10, 2011/08/05 17:04:20
|
Line 244 class MpdlXmlTextServer(SimpleItem):
|
Line 244 class MpdlXmlTextServer(SimpleItem):
|
elif dc == 'pageNumberOrigNorm': |
elif dc == 'pageNumberOrigNorm': |
pageinfo['pageNumberOrigNorm'] = div.text |
pageinfo['pageNumberOrigNorm'] = div.text |
|
|
# pageNumberOrigNorm |
# pageHeaderTitle |
|
elif dc == 'pageHeaderTitle': |
|
pageinfo['pageHeaderTitle'] = div.text |
|
|
|
# numFigureEntries |
elif dc == 'countFigureEntries': |
elif dc == 'countFigureEntries': |
docinfo['numFigureEntries'] = getInt(div.text) |
docinfo['numFigureEntries'] = getInt(div.text) |
|
|
# pageNumberOrigNorm |
# numTocEntries |
elif dc == 'countTocEntries': |
elif dc == 'countTocEntries': |
# WTF: s1 = int(s)/30+1 |
# WTF: s1 = int(s)/30+1 |
docinfo['numTocEntries'] = getInt(div.text) |
docinfo['numTocEntries'] = getInt(div.text) |
|
|
# pageHeaderTitle |
# numPlaces |
elif dc == 'pageHeaderTitle': |
elif dc == 'countPlaces': |
docinfo['pageHeaderTitle'] = div.text |
docinfo['numPlaces'] = getInt(div.text) |
|
|
# numTextPages |
# numTextPages |
elif dc == 'countPages': |
elif dc == 'countPages': |
Line 302 class MpdlXmlTextServer(SimpleItem):
|
Line 306 class MpdlXmlTextServer(SimpleItem):
|
textmode = "textPollux" |
textmode = "textPollux" |
elif not mode: |
elif not mode: |
# default is text |
# default is text |
|
mode = "text" |
textmode = "text" |
textmode = "text" |
else: |
else: |
textmode = mode |
textmode = mode |
Line 340 class MpdlXmlTextServer(SimpleItem):
|
Line 345 class MpdlXmlTextServer(SimpleItem):
|
return serialize(pagediv) |
return serialize(pagediv) |
|
|
# text-with-links mode |
# text-with-links mode |
elif mode == "text_dict": |
elif mode == "dict": |
if pagediv is not None: |
if pagediv is not None: |
# check all a-tags |
# check all a-tags |
links = pagediv.findall(".//a") |
links = pagediv.findall(".//a") |