version 1.175.2.10, 2011/07/28 13:00:07
|
version 1.175.2.11, 2011/07/29 16:27:24
|
Line 98 def browserCheck(self):
|
Line 98 def browserCheck(self):
|
|
|
return bt |
return bt |
|
|
def getParentDir(path): |
def getParentPath(path, cnt=1): |
"""returns pathname shortened by one""" |
"""returns pathname shortened by cnt""" |
return '/'.join(path.split('/')[0:-1]) |
# make sure path doesn't end with / |
|
path = path.rstrip('/') |
|
# split by /, shorten, and reassemble |
|
return '/'.join(path.split('/')[0:-cnt]) |
|
|
|
|
## |
## |
Line 263 class documentViewer(Folder):
|
Line 266 class documentViewer(Folder):
|
|
|
security.declareProtected('View','index_html') |
security.declareProtected('View','index_html') |
def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None): |
def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None): |
''' |
""" |
view it |
view it |
@param mode: defines how to access the document behind url |
@param mode: defines how to access the document behind url |
@param url: url which contains display information |
@param url: url which contains display information |
@param viewMode: if images display images, if text display text, default is auto (text,images or auto) |
@param viewMode: if images display images, if text display text, default is auto (text,images or auto) |
@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none) |
@param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none) |
@param characterNormalization type of text display (reg, norm, none) |
""" |
@param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma) |
|
''' |
|
|
|
logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) |
logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) |
|
|
Line 370 class documentViewer(Folder):
|
Line 371 class documentViewer(Folder):
|
# FIXME: does this belong here? |
# FIXME: does this belong here? |
if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath |
if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath |
urlParams["mode"] = "imagepath" |
urlParams["mode"] = "imagepath" |
urlParams["url"] = getParentDir(urlParams["url"]) |
urlParams["url"] = getParentPath(urlParams["url"]) |
|
|
# quote values and assemble into query string (not escaping '/') |
# quote values and assemble into query string (not escaping '/') |
ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()]) |
ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()]) |
Line 438 class documentViewer(Folder):
|
Line 439 class documentViewer(Folder):
|
docinfo = {} |
docinfo = {} |
|
|
for x in range(cut): |
for x in range(cut): |
path=getParentDir(path) |
path=getParentPath(path) |
|
|
infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path |
infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path |
|
|
Line 536 class documentViewer(Folder):
|
Line 537 class documentViewer(Folder):
|
|
|
if dom is None: |
if dom is None: |
for x in range(cut): |
for x in range(cut): |
path=getParentDir(path) |
path=getParentPath(path) |
dom = self.getDomFromIndexMeta(path) |
dom = self.getDomFromIndexMeta(path) |
|
|
acc = dom.find(".//access-conditions/access") |
acc = dom.find(".//access-conditions/access") |
Line 591 class documentViewer(Folder):
|
Line 592 class documentViewer(Folder):
|
|
|
if dom is None: |
if dom is None: |
for x in range(cut): |
for x in range(cut): |
path=getParentDir(path) |
path=getParentPath(path) |
dom = self.getDomFromIndexMeta(path) |
dom = self.getDomFromIndexMeta(path) |
|
|
docinfo['name']=getText(dom.find("name")) |
docinfo['name']=getText(dom.find("name")) |
Line 723 class documentViewer(Folder):
|
Line 724 class documentViewer(Folder):
|
|
|
pathorig=path |
pathorig=path |
for x in range(cut): |
for x in range(cut): |
path=getParentDir(path) |
path=getParentPath(path) |
logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path) |
logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path) |
imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path |
imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path |
docinfo['imageURL'] = imageUrl |
docinfo['imageURL'] = imageUrl |
Line 735 class documentViewer(Folder):
|
Line 736 class documentViewer(Folder):
|
return docinfo |
return docinfo |
|
|
|
|
def getDocinfo(self, mode, url): |
def OLDgetDocinfo(self, mode, url): |
"""returns docinfo depending on mode""" |
"""returns docinfo depending on mode""" |
logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url)) |
logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url)) |
# look for cached docinfo in session |
# look for cached docinfo in session |
Line 769 class documentViewer(Folder):
|
Line 770 class documentViewer(Folder):
|
self.REQUEST.SESSION['docinfo'] = docinfo |
self.REQUEST.SESSION['docinfo'] = docinfo |
return docinfo |
return docinfo |
|
|
|
|
|
def getDocinfo(self, mode, url): |
|
"""returns docinfo depending on mode""" |
|
logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url)) |
|
# look for cached docinfo in session |
|
if self.REQUEST.SESSION.has_key('docinfo'): |
|
docinfo = self.REQUEST.SESSION['docinfo'] |
|
# check if its still current |
|
if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url: |
|
logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys()) |
|
return docinfo |
|
|
|
# new docinfo |
|
docinfo = {'mode': mode, 'url': url} |
|
# add self url |
|
docinfo['viewerUrl'] = self.getDocumentViewerURL() |
|
# get index.meta DOM |
|
docUrl = None |
|
metaDom = None |
|
if mode=="texttool": |
|
# url points to document dir or index.meta |
|
metaDom = self.metadataService.getDomFromPathOrUrl(url) |
|
docUrl = url.replace('/index.meta', '') |
|
if metaDom is None: |
|
raise IOError("Unable to find index.meta for mode=texttool!") |
|
|
|
elif mode=="imagepath": |
|
# url points to folder with images, index.meta optional |
|
# asssume index.meta in parent dir |
|
docUrl = getParentPath(url) |
|
metaDom = self.metadataService.getDomFromPathOrUrl(docUrl) |
|
|
|
elif mode=="filepath": |
|
# url points to image file, index.meta optional |
|
# asssume index.meta is two path segments up |
|
docUrl = getParentPath(url, 2) |
|
metaDom = self.metadataService.getDomFromPathOrUrl(docUrl) |
|
|
|
else: |
|
logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode) |
|
raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode)) |
|
|
|
docinfo['documentUrl'] = docUrl |
|
# process index.meta contents |
|
if metaDom is not None: |
|
# document directory name and path |
|
resource = self.metadataService.getResourceData(dom=metaDom) |
|
if resource: |
|
docinfo = self.getDocinfoFromResource(docinfo, resource) |
|
|
|
# texttool info |
|
texttool = self.metadataService.getTexttoolData(dom=metaDom) |
|
if texttool: |
|
docinfo = self.getDocinfoFromTexttool(docinfo, texttool) |
|
|
|
# bib info |
|
bib = self.metadataService.getBibData(dom=metaDom) |
|
if bib: |
|
docinfo = self.getDocinfoFromBib(docinfo, bib) |
|
|
|
# auth info |
|
access = self.metadataService.getAccessData(dom=metaDom) |
|
if access: |
|
docinfo = self.getDocinfoFromAccess(docinfo, access) |
|
|
|
# image path |
|
if mode != 'texttool': |
|
# override image path from texttool |
|
docinfo['imagePath'] = url |
|
|
|
# number of images from digilib |
|
if docinfo.get('imagePath', None): |
|
docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath'] |
|
docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath']) |
|
|
|
logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys()) |
|
#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo) |
|
# store in session |
|
self.REQUEST.SESSION['docinfo'] = docinfo |
|
return docinfo |
|
|
|
def getDocinfoFromResource(self, docinfo, resource): |
|
"""reads contents of resource element into docinfo""" |
|
docName = resource.get('name', None) |
|
docinfo['documentName'] = docName |
|
docPath = resource.get('archive-path', None) |
|
if docPath: |
|
# clean up document path |
|
if docPath[0] != '/': |
|
docPath = '/' + docPath |
|
|
|
if docName and (not docPath.endswith(docName)): |
|
docPath += "/" + docName |
|
|
|
else: |
|
# use docUrl as docPath |
|
docUrl = docinfo['documentURL'] |
|
if not docUrl.startswith('http:'): |
|
docPath = docUrl |
|
|
|
docinfo['documentPath'] = docPath |
|
return docinfo |
|
|
|
def getDocinfoFromTexttool(self, docinfo, texttool): |
|
"""reads contents of texttool element into docinfo""" |
|
# image dir |
|
imageDir = texttool.get('image', None) |
|
docPath = docinfo.get('documentPath', None) |
|
if imageDir and docPath: |
|
#print "image: ", imageDir, " archivepath: ", archivePath |
|
imageDir = os.path.join(docPath, imageDir) |
|
imageDir = imageDir.replace('/mpiwg/online', '', 1) |
|
docinfo['imagePath'] = imageDir |
|
|
|
# old style text URL |
|
textUrl = texttool.get('text', None) |
|
if textUrl and docPath: |
|
if urlparse.urlparse(textUrl)[0] == "": #keine url |
|
textUrl = os.path.join(docPath, textUrl) |
|
# fix URLs starting with /mpiwg/online |
|
textUrl = textUrl.replace('/mpiwg/online', '', 1) |
|
|
|
docinfo['textURL'] = textUrl |
|
|
|
# new style text-url-path |
|
textUrl = texttool.get('text-url-path', None) |
|
if textUrl: |
|
docinfo['textURLPath'] = textUrl |
|
#TODO: ugly: |
|
#textUrlkurz = string.split(textUrl, ".")[0] |
|
#docinfo['textURLPathkurz'] = textUrlkurz |
|
|
|
# old presentation stuff |
|
presentation = texttool.get('presentation', None) |
|
if presentation and docPath: |
|
docinfo['presentationPath'] = os.path.join(docPath, presentation) |
|
|
|
return docinfo |
|
|
|
def getDocinfoFromBib(self, docinfo, bib): |
|
"""reads contents of bib element into docinfo""" |
|
# put all raw bib fields in dict "bib" |
|
docinfo['bib'] = bib |
|
bibtype = bib.get('@type', None) |
|
docinfo['bibType'] = bibtype |
|
# also store DC metadata for convenience |
|
dc = self.metadataService.getDCMappedData(bib) |
|
docinfo['creator'] = dc.get('creator',None) |
|
docinfo['title'] = dc.get('title',None) |
|
docinfo['date'] = dc.get('date',None) |
|
return docinfo |
|
|
|
def getDocinfoFromAccess(self, docinfo, acc): |
|
"""reads contents of access element into docinfo""" |
|
#TODO: also read resource type |
|
try: |
|
acctype = accc['@attr']['type'] |
|
if acctype: |
|
access=acctype |
|
if access in ['group', 'institution']: |
|
access = acc['name'].lower() |
|
|
|
docinfo['accessType'] = access |
|
|
|
except: |
|
pass |
|
|
|
return docinfo |
|
|
|
def getDocinfoFromDigilib(self, docinfo, path): |
|
infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path |
|
# fetch data |
|
txt = getHttpData(infoUrl) |
|
if not txt: |
|
logging.error("Unable to get dir-info from %s"%(infoUrl)) |
|
return docinfo |
|
|
|
dom = ET.fromstring(txt) |
|
size = getText(dom.find("size")) |
|
logging.debug("getDocinfoFromDigilib: size=%s"%size) |
|
if size: |
|
docinfo['numPages'] = int(size) |
|
else: |
|
docinfo['numPages'] = 0 |
|
|
|
# TODO: produce and keep list of image names and numbers |
|
return docinfo |
|
|
|
|
def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None): |
def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None): |
"""returns pageinfo with the given parameters""" |
"""returns pageinfo with the given parameters""" |
pageinfo = {} |
pageinfo = {} |