version 1.175.2.11, 2011/07/29 16:27:24
|
version 1.175.2.12, 2011/07/29 18:36:04
|
Line 264 class documentViewer(Folder):
|
Line 264 class documentViewer(Folder):
|
|
|
return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode) |
return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode) |
|
|
|
|
security.declareProtected('View','index_html') |
security.declareProtected('View','index_html') |
def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None): |
def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None): |
""" |
""" |
Line 293 class documentViewer(Folder):
|
Line 294 class documentViewer(Folder):
|
# auto viewMode: text_dict if text else images |
# auto viewMode: text_dict if text else images |
if viewMode=="auto": |
if viewMode=="auto": |
if docinfo.get('textURL', None) or docinfo.get('textURLPath', None): |
if docinfo.get('textURL', None) or docinfo.get('textURLPath', None): |
#texturl gesetzt und textViewer konfiguriert |
|
viewMode="text_dict" |
viewMode="text_dict" |
else: |
else: |
viewMode="images" |
viewMode="images" |
Line 389 class documentViewer(Folder):
|
Line 389 class documentViewer(Folder):
|
|
|
def getInfo_xml(self,url,mode): |
def getInfo_xml(self,url,mode): |
"""returns info about the document as XML""" |
"""returns info about the document as XML""" |
|
|
if not self.digilibBaseUrl: |
if not self.digilibBaseUrl: |
self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" |
self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" |
|
|
Line 397 class documentViewer(Folder):
|
Line 396 class documentViewer(Folder):
|
pt = getattr(self.template, 'info_xml') |
pt = getattr(self.template, 'info_xml') |
return pt(docinfo=docinfo) |
return pt(docinfo=docinfo) |
|
|
def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True): |
|
"""returns new option state""" |
|
if not self.REQUEST.SESSION.has_key(optionName): |
|
# not in session -- initial |
|
opt = {'lastState': newState, 'state': initialState} |
|
else: |
|
opt = self.REQUEST.SESSION.get(optionName) |
|
if opt['lastState'] != newState: |
|
# state in session has changed -- toggle |
|
opt['state'] = not opt['state'] |
|
opt['lastState'] = newState |
|
|
|
self.REQUEST.SESSION[optionName] = opt |
|
return opt['state'] |
|
|
|
def isAccessible(self, docinfo): |
def isAccessible(self, docinfo): |
"""returns if access to the resource is granted""" |
"""returns if access to the resource is granted""" |
access = docinfo.get('accessType', None) |
access = docinfo.get('accessType', None) |
logging.debug("documentViewer (accessOK) access type %s"%access) |
logging.debug("documentViewer (accessOK) access type %s"%access) |
if access is not None and access == 'free': |
if access == 'free': |
logging.debug("documentViewer (accessOK) access is free") |
logging.debug("documentViewer (accessOK) access is free") |
return True |
return True |
|
|
elif access is None or access in self.authgroups: |
elif access is None or access in self.authgroups: |
# only local access -- only logged in users |
# only local access -- only logged in users |
user = getSecurityManager().getUser() |
user = getSecurityManager().getUser() |
Line 433 class documentViewer(Folder):
|
Line 418 class documentViewer(Folder):
|
return False |
return False |
|
|
|
|
def getDirinfoFromDigilib(self,path,docinfo=None,cut=0): |
|
"""gibt param von dlInfo aus""" |
|
if docinfo is None: |
|
docinfo = {} |
|
|
|
for x in range(cut): |
|
path=getParentPath(path) |
|
|
|
infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path |
|
|
|
logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl)) |
|
|
|
txt = getHttpData(infoUrl) |
|
if txt is None: |
|
raise IOError("Unable to get dir-info from %s"%(infoUrl)) |
|
|
|
dom = ET.fromstring(txt) |
|
#dom = Parse(txt) |
|
size=getText(dom.find("size")) |
|
#sizes=dom.xpath("//dir/size") |
|
logging.debug("documentViewer (getparamfromdigilib) dirInfo:size=%s"%size) |
|
|
|
if size: |
|
docinfo['numPages'] = int(size) |
|
else: |
|
docinfo['numPages'] = 0 |
|
|
|
# TODO: produce and keep list of image names and numbers |
|
|
|
return docinfo |
|
|
|
def getIndexMetaPath(self,url): |
|
"""gib nur den Pfad zurueck""" |
|
regexp = re.compile(r".*(experimental|permanent)/(.*)") |
|
regpath = regexp.match(url) |
|
if (regpath==None): |
|
return "" |
|
logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2)) |
|
return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2)) |
|
|
|
|
|
|
|
def getIndexMetaUrl(self,url): |
|
"""returns utr of index.meta document at url""" |
|
|
|
metaUrl = None |
|
if url.startswith("http://"): |
|
# real URL |
|
metaUrl = url |
|
else: |
|
# online path |
|
server=self.digilibBaseUrl+"/servlet/Texter?fn=" |
|
metaUrl=server+url.replace("/mpiwg/online","") |
|
if not metaUrl.endswith("index.meta"): |
|
metaUrl += "/index.meta" |
|
|
|
return metaUrl |
|
|
|
def getDomFromIndexMeta(self, url): |
|
"""get dom from index meta""" |
|
dom = None |
|
metaUrl = self.getIndexMetaUrl(url) |
|
|
|
logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl) |
|
txt=getHttpData(metaUrl) |
|
if txt is None: |
|
raise IOError("Unable to read index meta from %s"%(url)) |
|
|
|
dom = ET.fromstring(txt) |
|
#dom = Parse(txt) |
|
return dom |
|
|
|
def getPresentationInfoXML(self, url): |
|
"""returns dom of info.xml document at url""" |
|
dom = None |
|
metaUrl = None |
|
if url.startswith("http://"): |
|
# real URL |
|
metaUrl = url |
|
else: |
|
# online path |
|
server=self.digilibBaseUrl+"/servlet/Texter?fn=" |
|
metaUrl=server+url.replace("/mpiwg/online","") |
|
|
|
txt=getHttpData(metaUrl) |
|
if txt is None: |
|
raise IOError("Unable to read infoXMLfrom %s"%(url)) |
|
|
|
dom = ET.fromstring(txt) |
|
#dom = Parse(txt) |
|
return dom |
|
|
|
|
|
def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): |
|
"""gets authorization info from the index.meta file at path or given by dom""" |
|
logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path)) |
|
|
|
access = None |
|
|
|
if docinfo is None: |
|
docinfo = {} |
|
|
|
if dom is None: |
|
for x in range(cut): |
|
path=getParentPath(path) |
|
dom = self.getDomFromIndexMeta(path) |
|
|
|
acc = dom.find(".//access-conditions/access") |
|
if acc is not None: |
|
acctype = acc.get('type') |
|
#acctype = dom.xpath("//access-conditions/access/@type") |
|
if acctype: |
|
access=acctype |
|
if access in ['group', 'institution']: |
|
access = dom.find(".//access-conditions/access/name").text.lower() |
|
|
|
docinfo['accessType'] = access |
|
return docinfo |
|
|
|
|
|
def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): |
|
"""gets bibliographical info from the index.meta file at path or given by dom""" |
|
logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path)) |
|
|
|
if docinfo is None: |
|
docinfo = {} |
|
|
|
if dom is None: |
|
for x in range(cut): |
|
path=getParentDir(path) |
|
dom = self.getDomFromIndexMeta(path) |
|
|
|
docinfo['indexMetaPath']=self.getIndexMetaPath(path); |
|
|
|
logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path)) |
|
if self.metadataService is not None: |
|
# put all raw bib fields in dict "bib" |
|
bib = self.metadataService.getBibData(dom=dom) |
|
docinfo['bib'] = bib |
|
bibtype = bib.get('@type', None) |
|
docinfo['bib_type'] = bibtype |
|
# also store DC metadata for convenience |
|
dc = self.metadataService.getDCMappedData(bib) |
|
docinfo['creator'] = dc.get('creator',None) |
|
docinfo['title'] = dc.get('title',None) |
|
docinfo['date'] = dc.get('date',None) |
|
else: |
|
logging.error("MetadataService not found!") |
|
return docinfo |
|
|
|
|
|
# TODO: is this needed? |
|
def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): |
|
"""gets name info from the index.meta file at path or given by dom""" |
|
if docinfo is None: |
|
docinfo = {} |
|
|
|
if dom is None: |
|
for x in range(cut): |
|
path=getParentPath(path) |
|
dom = self.getDomFromIndexMeta(path) |
|
|
|
docinfo['name']=getText(dom.find("name")) |
|
logging.debug("documentViewer docinfo[name] %s"%docinfo['name']) |
|
return docinfo |
|
|
|
|
|
def getDocinfoFromTextTool(self, url, dom=None, docinfo=None): |
|
"""parse texttool tag in index meta""" |
|
logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url)) |
|
if docinfo is None: |
|
docinfo = {} |
|
if docinfo.get('lang', None) is None: |
|
docinfo['lang'] = '' # default keine Sprache gesetzt |
|
if dom is None: |
|
dom = self.getDomFromIndexMeta(url) |
|
|
|
texttool = self.metadata.getTexttoolData(dom=dom) |
|
|
|
archivePath = None |
|
archiveName = None |
|
|
|
archiveName = getText(dom.find("name")) |
|
if not archiveName: |
|
logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url)) |
|
|
|
archivePath = getText(dom.find("archive-path")) |
|
if archivePath: |
|
# clean up archive path |
|
if archivePath[0] != '/': |
|
archivePath = '/' + archivePath |
|
if archiveName and (not archivePath.endswith(archiveName)): |
|
archivePath += "/" + archiveName |
|
else: |
|
# try to get archive-path from url |
|
logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url)) |
|
if (not url.startswith('http')): |
|
archivePath = url.replace('index.meta', '') |
|
|
|
if archivePath is None: |
|
# we balk without archive-path |
|
raise IOError("Missing archive-path (for text-tool) in %s" % (url)) |
|
|
|
imageDir = texttool.get('image', None) |
|
|
|
if not imageDir: |
|
# we balk with no image tag / not necessary anymore because textmode is now standard |
|
#raise IOError("No text-tool info in %s"%(url)) |
|
imageDir = "" |
|
#xquery="//pb" |
|
docinfo['imagePath'] = "" # keine Bilder |
|
docinfo['imageURL'] = "" |
|
|
|
if imageDir and archivePath: |
|
#print "image: ", imageDir, " archivepath: ", archivePath |
|
imageDir = os.path.join(archivePath, imageDir) |
|
imageDir = imageDir.replace("/mpiwg/online", '') |
|
docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo) |
|
docinfo['imagePath'] = imageDir |
|
|
|
docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir |
|
|
|
viewerUrl = texttool.get('digiliburlprefix', None) |
|
if viewerUrl: |
|
docinfo['viewerURL'] = viewerUrl |
|
|
|
# old style text URL |
|
textUrl = texttool.get('text', None) |
|
if textUrl: |
|
if urlparse.urlparse(textUrl)[0] == "": #keine url |
|
textUrl = os.path.join(archivePath, textUrl) |
|
# fix URLs starting with /mpiwg/online |
|
if textUrl.startswith("/mpiwg/online"): |
|
textUrl = textUrl.replace("/mpiwg/online", '', 1) |
|
|
|
docinfo['textURL'] = textUrl |
|
|
|
# new style text-url-path |
|
textUrl = texttool.get('text-url-path', None) |
|
if textUrl: |
|
docinfo['textURLPath'] = textUrl |
|
textUrlkurz = string.split(textUrl, ".")[0] |
|
docinfo['textURLPathkurz'] = textUrlkurz |
|
#if not docinfo['imagePath']: |
|
# text-only, no page images |
|
#docinfo = self.getNumTextPages(docinfo) |
|
|
|
# get bib info |
|
docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag |
|
# TODO: is this needed here? |
|
docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom) |
|
|
|
# TODO: what to do with presentation? |
|
presentationUrl = texttool.get('presentation', None) |
|
if presentationUrl: # ueberschreibe diese durch presentation informationen |
|
# presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten |
|
# durch den relativen Pfad auf die presentation infos |
|
presentationPath = presentationUrl |
|
if url.endswith("index.meta"): |
|
presentationUrl = url.replace('index.meta', presentationPath) |
|
else: |
|
presentationUrl = url + "/" + presentationPath |
|
|
|
docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom) |
|
|
|
# get authorization |
|
docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info |
|
|
|
return docinfo |
|
|
|
|
|
def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None): |
|
"""gets the bibliographical information from the preseantion entry in texttools |
|
""" |
|
dom=self.getPresentationInfoXML(url) |
|
docinfo['author']=getText(dom.find(".//author")) |
|
docinfo['title']=getText(dom.find(".//title")) |
|
docinfo['year']=getText(dom.find(".//date")) |
|
return docinfo |
|
|
|
def getDocinfoFromImagePath(self,path,docinfo=None,cut=0): |
|
"""path ist the path to the images it assumes that the index.meta file is one level higher.""" |
|
logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path)) |
|
if docinfo is None: |
|
docinfo = {} |
|
path=path.replace("/mpiwg/online","") |
|
docinfo['imagePath'] = path |
|
docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut) |
|
|
|
pathorig=path |
|
for x in range(cut): |
|
path=getParentPath(path) |
|
logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path) |
|
imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path |
|
docinfo['imageURL'] = imageUrl |
|
|
|
#TODO: use getDocinfoFromIndexMeta |
|
#path ist the path to the images it assumes that the index.meta file is one level higher. |
|
docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1) |
|
docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1) |
|
return docinfo |
|
|
|
|
|
def OLDgetDocinfo(self, mode, url): |
|
"""returns docinfo depending on mode""" |
|
logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url)) |
|
# look for cached docinfo in session |
|
if self.REQUEST.SESSION.has_key('docinfo'): |
|
docinfo = self.REQUEST.SESSION['docinfo'] |
|
# check if its still current |
|
if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url: |
|
logging.debug("documentViewer (getdocinfo) docinfo in session. keys=%s"%docinfo.keys()) |
|
return docinfo |
|
|
|
# new docinfo |
|
docinfo = {'mode': mode, 'url': url} |
|
# add self url |
|
docinfo['viewerUrl'] = self.getDocumentViewerURL() |
|
if mode=="texttool": |
|
# index.meta with texttool information |
|
docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo) |
|
elif mode=="imagepath": |
|
# folder with images, index.meta optional |
|
docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo) |
|
elif mode=="filepath": |
|
# filename |
|
docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1) |
|
else: |
|
logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode) |
|
raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode)) |
|
|
|
logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys()) |
|
#logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo) |
|
# store in session |
|
self.REQUEST.SESSION['docinfo'] = docinfo |
|
return docinfo |
|
|
|
|
|
def getDocinfo(self, mode, url): |
def getDocinfo(self, mode, url): |
"""returns docinfo depending on mode""" |
"""returns docinfo depending on mode""" |
Line 829 class documentViewer(Folder):
|
Line 477 class documentViewer(Folder):
|
bib = self.metadataService.getBibData(dom=metaDom) |
bib = self.metadataService.getBibData(dom=metaDom) |
if bib: |
if bib: |
docinfo = self.getDocinfoFromBib(docinfo, bib) |
docinfo = self.getDocinfoFromBib(docinfo, bib) |
|
else: |
|
# no bib - try info.xml |
|
docinfo = self.getDocinfoFromPresentationInfoXml(docinfo) |
|
|
# auth info |
# auth info |
access = self.metadataService.getAccessData(dom=metaDom) |
access = self.metadataService.getAccessData(dom=metaDom) |
Line 838 class documentViewer(Folder):
|
Line 489 class documentViewer(Folder):
|
# image path |
# image path |
if mode != 'texttool': |
if mode != 'texttool': |
# override image path from texttool |
# override image path from texttool |
docinfo['imagePath'] = url |
docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1) |
|
|
# number of images from digilib |
# number of images from digilib |
if docinfo.get('imagePath', None): |
if docinfo.get('imagePath', None): |
Line 869 class documentViewer(Folder):
|
Line 520 class documentViewer(Folder):
|
docUrl = docinfo['documentURL'] |
docUrl = docinfo['documentURL'] |
if not docUrl.startswith('http:'): |
if not docUrl.startswith('http:'): |
docPath = docUrl |
docPath = docUrl |
|
if docPath: |
|
# fix URLs starting with /mpiwg/online |
|
docPath = docPath.replace('/mpiwg/online', '', 1) |
|
|
docinfo['documentPath'] = docPath |
docinfo['documentPath'] = docPath |
return docinfo |
return docinfo |
Line 889 class documentViewer(Folder):
|
Line 543 class documentViewer(Folder):
|
if textUrl and docPath: |
if textUrl and docPath: |
if urlparse.urlparse(textUrl)[0] == "": #keine url |
if urlparse.urlparse(textUrl)[0] == "": #keine url |
textUrl = os.path.join(docPath, textUrl) |
textUrl = os.path.join(docPath, textUrl) |
# fix URLs starting with /mpiwg/online |
|
textUrl = textUrl.replace('/mpiwg/online', '', 1) |
|
|
|
docinfo['textURL'] = textUrl |
docinfo['textURL'] = textUrl |
|
|
Line 905 class documentViewer(Folder):
|
Line 557 class documentViewer(Folder):
|
# old presentation stuff |
# old presentation stuff |
presentation = texttool.get('presentation', None) |
presentation = texttool.get('presentation', None) |
if presentation and docPath: |
if presentation and docPath: |
docinfo['presentationPath'] = os.path.join(docPath, presentation) |
if presentation.startswith('http:'): |
|
docinfo['presentationUrl'] = presentation |
|
else: |
|
docinfo['presentationUrl'] = os.path.join(docPath, presentation) |
|
|
return docinfo |
return docinfo |
|
|
def getDocinfoFromBib(self, docinfo, bib): |
def getDocinfoFromBib(self, docinfo, bib): |
"""reads contents of bib element into docinfo""" |
"""reads contents of bib element into docinfo""" |
|
logging.debug("getDocinfoFromBib bib=%s"%repr(bib)) |
# put all raw bib fields in dict "bib" |
# put all raw bib fields in dict "bib" |
docinfo['bib'] = bib |
docinfo['bib'] = bib |
bibtype = bib.get('@type', None) |
bibtype = bib.get('@type', None) |
Line 925 class documentViewer(Folder):
|
Line 581 class documentViewer(Folder):
|
def getDocinfoFromAccess(self, docinfo, acc): |
def getDocinfoFromAccess(self, docinfo, acc): |
"""reads contents of access element into docinfo""" |
"""reads contents of access element into docinfo""" |
#TODO: also read resource type |
#TODO: also read resource type |
|
logging.debug("getDocinfoFromAccess acc=%s"%repr(acc)) |
try: |
try: |
acctype = accc['@attr']['type'] |
acctype = acc['@attr']['type'] |
if acctype: |
if acctype: |
access=acctype |
access=acctype |
if access in ['group', 'institution']: |
if access in ['group', 'institution']: |
Line 959 class documentViewer(Folder):
|
Line 616 class documentViewer(Folder):
|
return docinfo |
return docinfo |
|
|
|
|
|
def getDocinfoFromPresentationInfoXml(self,docinfo): |
|
"""gets DC-like bibliographical information from the presentation entry in texttools""" |
|
url = docinfo.get('presentationUrl', None) |
|
if not url: |
|
logging.error("getDocinfoFromPresentation: no URL!") |
|
return docinfo |
|
|
|
dom = None |
|
metaUrl = None |
|
if url.startswith("http://"): |
|
# real URL |
|
metaUrl = url |
|
else: |
|
# online path |
|
|
|
server=self.digilibBaseUrl+"/servlet/Texter?fn=" |
|
metaUrl=server+url |
|
|
|
txt=getHttpData(metaUrl) |
|
if txt is None: |
|
logging.error("Unable to read info.xml from %s"%(url)) |
|
return docinfo |
|
|
|
dom = ET.fromstring(txt) |
|
docinfo['creator']=getText(dom.find(".//author")) |
|
docinfo['title']=getText(dom.find(".//title")) |
|
docinfo['date']=getText(dom.find(".//date")) |
|
return docinfo |
|
|
|
|
def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None): |
def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None): |
"""returns pageinfo with the given parameters""" |
"""returns pageinfo with the given parameters""" |
pageinfo = {} |
pageinfo = {} |