version 1.3, 2003/10/01 08:20:47
|
version 1.35, 2008/01/18 20:36:28
|
Line 1
|
Line 1
|
# Classes for displaying, browsing and organizing the archive |
""" Classes for displaying, browsing and organizing the archive |
|
20040303 Needs configuration for rescaling thumbs |
|
|
|
""" |
|
|
import addFolder |
import addFolder |
|
import OSAS_helpers |
from OFS.Folder import Folder |
from OFS.Folder import Folder |
from OFS.SimpleItem import SimpleItem |
from OFS.SimpleItem import SimpleItem |
from Products.PageTemplates.PageTemplateFile import PageTemplateFile |
from Products.PageTemplates.PageTemplateFile import PageTemplateFile |
from Products.PageTemplates.PageTemplate import PageTemplate |
from Products.PageTemplates.PageTemplate import PageTemplate |
|
from AccessControl import ClassSecurityInfo |
|
from Globals import InitializeClass,package_home |
|
import zLOG |
|
|
class OSAS_ShowOnline(SimpleItem): |
class OSAS_ShowOnline(SimpleItem): |
|
security=ClassSecurityInfo() |
|
|
"""OnlineBrowser""" |
"""OnlineBrowser""" |
def __init__(self,id): |
def __init__(self,id): |
"""initialize a new instance""" |
"""initialize a new instance""" |
Line 14 class OSAS_ShowOnline(SimpleItem):
|
Line 23 class OSAS_ShowOnline(SimpleItem):
|
|
|
meta_type="OSAS_ShowOnline" |
meta_type="OSAS_ShowOnline" |
|
|
|
security.declareProtected('View','index_html') |
def index_html(self): |
def index_html(self): |
"""main view""" |
"""main view""" |
pt=PageTemplateFile('products/OSA_system/OSAS_ViewFiles.zpt').__of__(self) |
pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','OSAS_ViewFiles.zpt')).__of__(self) |
return pt() |
return pt() |
|
|
def getfilesystem2(self,start,reload=0): |
def getfilesystem2(self,start,reload=0): |
Line 45 class OSAS_ShowOnline(SimpleItem):
|
Line 55 class OSAS_ShowOnline(SimpleItem):
|
return hasMetafile(path) |
return hasMetafile(path) |
|
|
def getMetafile(self,path): |
def getMetafile(self,path): |
return getMetafile(path) |
|
|
return getMetafile(path).decode('utf-8') |
|
|
def toggle_view(self,path,file): |
def toggle_view(self,path,file): |
"""Oeffnen bzw. schließen der Subfolders""" |
"""Oeffnen bzw. schließen der Subfolders""" |
self.tree(path).toggle(path,file) |
self.tree(path).toggle(path,file) |
return self.REQUEST.RESPONSE.redirect(self.REQUEST['URL1']+"?path="+path) |
return self.REQUEST.RESPONSE.redirect(self.REQUEST['URL1']+"?path="+path) |
|
|
|
|
|
InitializeClass(OSAS_ShowOnline) |
|
|
def manage_AddOSAS_ShowOnlineForm(self): |
def manage_AddOSAS_ShowOnlineForm(self): |
"""interface for adding the OSAS_root""" |
"""interface for adding the OSAS_root""" |
pt=PageTemplateFile('products/OSA_system/AddOSAS_ShowOnline.zpt').__of__(self) |
pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','AddOSAS_ShowOnline.zpt')).__of__(self) |
return pt() |
return pt() |
|
|
def manage_AddOSAS_ShowOnline(self,id,RESPONSE=None): |
def manage_AddOSAS_ShowOnline(self,id,RESPONSE=None): |
Line 67 def manage_AddOSAS_ShowOnline(self,id,RE
|
Line 81 def manage_AddOSAS_ShowOnline(self,id,RE
|
|
|
class OSAS_StoreOnline(SimpleItem): |
class OSAS_StoreOnline(SimpleItem): |
"""Webfrontend für das Storagesystem""" |
"""Webfrontend für das Storagesystem""" |
|
security=ClassSecurityInfo() |
|
|
def __init__(self,id): |
def __init__(self,id): |
"""initialize a new instance""" |
"""initialize a new instance""" |
self.id = id |
self.id = id |
|
|
meta_type="OSAS_StoreOnline" |
meta_type="OSAS_StoreOnline" |
|
|
|
security.declareProtected('View','index_html') |
def index_html(self): |
def index_html(self): |
"""main view""" |
"""main view""" |
pt=PageTemplateFile('products/OSA_system/OSAS_StoreFiles.zpt').__of__(self) |
pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','OSAS_StoreFiles.zpt')).__of__(self) |
|
return pt() |
|
|
|
def readContexts(self,path): |
|
"""Zeige Contexte""" |
|
if os.path.exists(path+"/index.meta"): |
|
|
|
return readContexts(path) |
|
|
|
else: |
|
|
|
return [] |
|
|
|
def rescaleThumbs(self,path): |
|
"""rescale thumbs of images in path""" |
|
|
|
dlpath = re.sub('/mpiwg/online/','',self.REQUEST['path']) |
|
ret=os.popen("ssh archive@nausikaa2.rz-berlin.mpg.de /usr/local/mpiwg/scripts/scaleomat -src=/mpiwg/online -dest=/mpiwg/temp/online/scaled/thumb -dir=%s -scaleto=100 -sync >> /tmp/sc.out &"%dlpath ) |
|
#os.popen("ssh archive@nausikaa2.rz-berlin.mpg.de /usr/local/mpiwg/scripts/scaleomat.pl %s /mpiwg/temp/online/scaled/thumb 90 --replace >> /tmp/sc.out &"% re.sub('/mpiwg/online/','',self.REQUEST['path'])) |
|
zLOG.LOG("OSAS_StoreOnline (rescleThumbs)",zLOG.INFO,ret.read()) |
|
zLOG.LOG("OSAS_StoreOnline (rescleThumbs)",zLOG.INFO,"ssh archive@nausikaa2.rz-berlin.mpg.de /usr/local/mpiwg/scripts/scaleomat -src=/mpiwg/online -dest=/mpiwg/temp/online/scaled/thumb -dir=%s -scaleto=100 -sync >> /tmp/sc.out &"%dlpath) |
|
self.REQUEST.SESSION['path']=self.REQUEST['path'] |
|
#return self.REQUEST.RESPONSE.redirect(self.REQUEST['URL1']) |
|
pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','OSAS_scaled.zpt')).__of__(self) |
return pt() |
return pt() |
|
|
|
|
def getfilesystem2(self,start,reload=0): |
def getfilesystem2(self,start,reload=0): |
"""load filesystem""" |
"""load filesystem""" |
|
|
Line 104 class OSAS_StoreOnline(SimpleItem):
|
Line 145 class OSAS_StoreOnline(SimpleItem):
|
return hasMetafile(path) |
return hasMetafile(path) |
|
|
def getMetafile(self,path): |
def getMetafile(self,path): |
return getMetafile(path) |
|
|
return getMetafile(path).decode('utf-8') |
|
|
def toggle_view(self,path,file): |
def toggle_view(self,path,file): |
"""Oeffnen bzw. schließen der Subfolders""" |
"""Oeffnen bzw. schließen der Subfolders""" |
Line 116 class OSAS_StoreOnline(SimpleItem):
|
Line 158 class OSAS_StoreOnline(SimpleItem):
|
return isFolder(self,path) |
return isFolder(self,path) |
|
|
def isScannedDocument(self,path): |
def isScannedDocument(self,path): |
"""Test ob Eintrag ein Folder ist""" |
"""Test ob Eintrag ein Scanned Document ist""" |
return isScannedDocument(self,path) |
return isScannedDocument(self,path) |
|
|
def isFullText(self,path,folder_name): |
def isFullText(self,path,folder_name): |
"""Test ob Eintrag ein Folder ist""" |
"""Test ob Eintrag ein Folder ist""" |
return isFullText(path,folder_name) |
return isFullText(path,folder_name) |
|
|
|
def isPdf(self,path,folder_name): |
|
"""Test ob Eintrag ein Folder mit pdf2 ist""" |
|
return isPdf(path,folder_name) |
|
|
|
|
|
def isPresentation(self,path,folder_name): |
|
"""Test ob Eintrag ein Folder ist""" |
|
return isPresentation(path,folder_name) |
|
|
|
def date(self): |
|
return strftime("%d.%m.%Y",localtime()) |
|
|
def addFolderForm(self,path): |
def addFolderForm(self,path): |
"""add a new path""" |
"""add a new path""" |
pt=PageTemplateFile('products/OSA_system/OSAS_addFolder.zpt').__of__(self) |
pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','OSAS_addFolder.zpt')).__of__(self) |
return pt() |
return pt() |
|
|
def date(self): |
def showHelp(self,refType,genericTag): |
return strftime("%d.%m.%Y",localtime()) |
"""helptext""" |
|
for reference in self.ZopeFind(self.standardMD): |
|
if reference[1].title==refType: |
|
text=getattr(reference[1],'md_'+genericTag)[2] |
|
return text |
|
return "NO EXPLANATION" |
|
|
|
def showHelpTag(self,url,reftype,item): |
|
"""gen javascript for showhelp""" |
|
url2=url+'/showHelp?refType=%s&genericTag=%s'%(reftype,item) |
|
ret="""javascript:wd=window.open(\'%s\',\'Help\',\'width=300,height=250\');void(\'\');wd.focus();"""%url2 |
|
return ret |
|
|
def addFolder(self,path,folder_name,description,archive_creation_date,creator): |
def addFolder(self,path,folder_name,description,archive_creation_date,creator): |
"""add the folder to the filesystem and write the metadata files""" |
"""add the folder to the filesystem and write the metadata files""" |
Line 139 class OSAS_StoreOnline(SimpleItem):
|
Line 204 class OSAS_StoreOnline(SimpleItem):
|
"""Editiere das Index Metafile""" |
"""Editiere das Index Metafile""" |
try: |
try: |
dom=xml.dom.minidom.parse(path+"/index.meta") |
dom=xml.dom.minidom.parse(path+"/index.meta") |
indexmeta=dom.toxml() |
indexmeta=dom.toxml(encoding='UTF-8') |
except: |
except: |
indexmeta="" |
indexmeta="" |
|
|
self.REQUEST.SESSION['indexmeta']=indexmeta |
self.REQUEST.SESSION['indexmeta']=indexmeta |
self.REQUEST.SESSION['path']=path |
self.REQUEST.SESSION['path']=path |
newtemplate=PageTemplateFile('products/OSA_system/editindex').__of__(self) |
newtemplate=PageTemplateFile(os.path.join(package_home(globals()),'zpt','editindex')).__of__(self) |
return newtemplate() |
return newtemplate() |
|
|
def EditIndex2(self): |
def EditIndex2(self): |
Line 165 class OSAS_StoreOnline(SimpleItem):
|
Line 231 class OSAS_StoreOnline(SimpleItem):
|
|
|
def add_metafile(self): |
def add_metafile(self): |
"""nothing""" |
"""nothing""" |
pt=PageTemplateFile('products/OSA_system/OSAS_addmetadata.zpt').__of__(self) |
pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','OSAS_addmetadata.zpt')).__of__(self) |
return pt() |
return pt() |
|
|
|
def getTextToolsField(self,path,name,default=''): |
|
"""Lese Textoolsfelder aus index.meta im path aus""" |
|
|
|
try: |
|
dom=xml.dom.minidom.parse(path+"/index.meta") |
|
node=dom.getElementsByTagName('texttool')[0] #getNode |
|
subnode=node.getElementsByTagName(name)[0] |
|
|
|
# bei text wird nur der Folder gebraucht |
|
if name=="text": |
|
splitted=getText(subnode.childNodes).split("/") |
|
return splitted[len(splitted)-2] |
|
else: |
|
return getText(subnode.childNodes) |
|
except: |
|
return default |
|
|
|
def getViewerTemplateSets(self,obj_ids=None): |
|
"""Get the ViewerTemplateSet title for configuration""" |
|
ret=[] |
|
|
|
try: |
|
viewerTemplateSets=self.ZopeFind(self.viewerTemplateSets,obj_metatypes=['OSAS_viewerTemplateSet'],obj_ids=obj_ids)#assumes viewerTemplateSets folder somewhere in the hierarchie. |
|
|
|
for viewerTemplateSet in viewerTemplateSets: |
|
ret.append((viewerTemplateSet[1].title,viewerTemplateSet[0],viewerTemplateSet[1])) |
|
|
|
return ret |
|
|
|
except: |
|
return [('no ViewerTemplateSetfolders','')] |
|
|
|
def changeTemplatesForm(self,path): |
|
"""changeform""" |
|
path=self.REQUEST.SESSION['path']=path |
|
pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','changeTemplatesForm.zpt')).__of__(self) |
|
return pt() |
|
|
|
|
|
|
|
def changeTemplates(self,path,RESPONSE=None): |
|
"""Andere alle Eintraege in index.meta""" |
|
os.path.walk(path,OSAS_helpers.changeIndexMeta,self.REQUEST.form) |
|
if RESPONSE is not None: |
|
RESPONSE.redirect(self.REQUEST['URL1']+"?path="+self.REQUEST.SESSION['path']) |
|
|
|
|
|
InitializeClass(OSAS_StoreOnline) |
|
|
def manage_AddOSAS_StoreOnlineForm(self): |
def manage_AddOSAS_StoreOnlineForm(self): |
"""interface for adding the OSAS_root""" |
"""interface for adding the OSAS_root""" |
pt=PageTemplateFile('products/OSA_system/AddOSAS_StoreOnline.zpt').__of__(self) |
pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','AddOSAS_StoreOnline.zpt')).__of__(self) |
return pt() |
return pt() |
|
|
def manage_AddOSAS_StoreOnline(self,id,RESPONSE=None): |
def manage_AddOSAS_StoreOnline(self,id,RESPONSE=None): |
Line 183 def manage_AddOSAS_StoreOnline(self,id,R
|
Line 298 def manage_AddOSAS_StoreOnline(self,id,R
|
|
|
|
|
|
|
|
def readContexts(path): |
|
"""ReadContext from index.meta""" |
|
dom=xml.dom.minidom.parse(path+"/index.meta") |
|
nodes=dom.getElementsByTagName('context') |
|
ret=[] |
|
|
|
|
|
for node in nodes: |
|
try: |
|
link=getText(node.getElementsByTagName('link')[0].childNodes) |
|
name=getText(node.getElementsByTagName('name')[0].childNodes) |
|
ret.append((link,name)) |
|
except: |
|
"""nothing""" |
|
return ret |
|
|
|
|
### Ab hier Baustelle |
### Ab hier Baustelle |
|
|
|
|
Line 195 from AccessControl import ClassSecurityI
|
Line 327 from AccessControl import ClassSecurityI
|
from AccessControl.Role import RoleManager |
from AccessControl.Role import RoleManager |
from Acquisition import Implicit |
from Acquisition import Implicit |
from Globals import Persistent |
from Globals import Persistent |
|
try: |
from time import strptime |
from time import strptime |
|
except: |
|
print "ignoring time.strptime import" |
|
|
from time import strftime |
from time import strftime |
import time |
import time |
import os.path |
import os.path |
Line 204 import xml.dom.minidom
|
Line 340 import xml.dom.minidom
|
from Products.PageTemplates.PageTemplateFile import PageTemplateFile |
from Products.PageTemplates.PageTemplateFile import PageTemplateFile |
from Products.PageTemplates.PageTemplate import PageTemplate |
from Products.PageTemplates.PageTemplate import PageTemplate |
import tempfile |
import tempfile |
tempfile.tempdir="/var/tmp/archiver" |
tempfile.tempdir="/var/tmp" |
|
|
exclusion=[".HSResource","lost+found","Network Trash Folder","TheFindByContentFolder","TheVolumeSettingsFolder"] |
exclusion=[".HSResource","lost+found","Network Trash Folder","TheFindByContentFolder","TheVolumeSettingsFolder"] |
|
|
class fsentry(Implicit, Persistent, RoleManager): |
class fsentry(Implicit, Persistent, RoleManager): |
"""File entry class""" |
"""File entry class""" |
path = "" |
path = "" |
Line 326 class filesystem(Implicit, Persistent, R
|
Line 463 class filesystem(Implicit, Persistent, R
|
|
|
def archive_the_path(self,path): |
def archive_the_path(self,path): |
"""parse indexmeta and return digilib path""" |
"""parse indexmeta and return digilib path""" |
|
|
try: |
try: |
#f = os.popen("cat "+path+"/index.meta","r") |
#f = os.popen("cat "+path+"/index.meta","r") |
f =file(path+"/index.meta","r") |
f =file(path+"/index.meta","r") |
Line 334 class filesystem(Implicit, Persistent, R
|
Line 472 class filesystem(Implicit, Persistent, R
|
|
|
try: |
try: |
dom = xml.dom.minidom.parseString(lines) |
dom = xml.dom.minidom.parseString(lines) |
|
if dom.getElementsByTagName("content-type"): |
if getText(dom.getElementsByTagName("content-type")[0].childNodes)=="folder": |
if getText(dom.getElementsByTagName("content-type")[0].childNodes)=="folder": |
"""folder nicht archivieren""" |
"""folder nicht archivieren""" |
return 0 |
return 0 |
else: |
|
archive_storage_date=getText(dom.getElementsByTagName("archive-storage-date")[0].childNodes) |
archive_storage_date=getText(dom.getElementsByTagName("archive-storage-date")[0].childNodes) |
|
|
if archive_storage_date=="": |
if archive_storage_date=="": |
Line 426 class filesystem2(Implicit, Persistent,
|
Line 565 class filesystem2(Implicit, Persistent,
|
|
|
def getfs(self,start): |
def getfs(self,start): |
"""load filessystem""" |
"""load filessystem""" |
|
|
f = os.popen("find "+ start+" -name '*' ","r") |
f = os.popen("find "+ start+" -name '*' ","r") |
lines = f.readlines() |
lines = f.readlines() |
|
|
Line 434 class filesystem2(Implicit, Persistent,
|
Line 574 class filesystem2(Implicit, Persistent,
|
def loadfs(self,start): |
def loadfs(self,start): |
"""analyse filesystem""" |
"""analyse filesystem""" |
for line in self.getfs(start): |
for line in self.getfs(start): |
|
logging.debug("loadfs: reading %s"%repr(line)) |
g=re.search(r"(.*/)(.*)\n",line) |
g=re.search(r"(.*/)(.*)\n",line) |
if not g==None: |
if not g==None: |
try: |
try: |
Line 442 class filesystem2(Implicit, Persistent,
|
Line 582 class filesystem2(Implicit, Persistent,
|
file=g.group(2) |
file=g.group(2) |
except: |
except: |
"""nothing""" |
"""nothing""" |
|
logging.debug("loadfs: file=%s"%repr(file)) |
|
#if file[0] in ('.',':'): |
|
# skip dot-files |
|
# continue |
if self.node.has_key(path): |
if self.node.has_key(path): |
elements=self.node[path] |
elements=self.node[path] |
elements.append(file) |
elements.append(file) |
Line 512 class browse(Implicit, Persistent, RoleM
|
Line 656 class browse(Implicit, Persistent, RoleM
|
def filterExcluded(self,dir): |
def filterExcluded(self,dir): |
ret=[] |
ret=[] |
for item in dir: |
for item in dir: |
if not item in exclusion: |
if not ((item in exclusion) or (item[0] in ('.',':'))): |
ret.append(item) |
ret.append(item) |
return ret |
return ret |
|
|
Line 561 class browse(Implicit, Persistent, RoleM
|
Line 705 class browse(Implicit, Persistent, RoleM
|
|
|
def getfs(start): |
def getfs(start): |
"""return filesystem""" |
"""return filesystem""" |
|
|
|
|
f = os.popen("find "+ start+" -name '*'","r") |
f = os.popen("find "+ start+" -name '*'","r") |
lines = f.readlines() |
lines = f.readlines() |
return lines |
return lines |
Line 826 def getBib(nodelist):
|
Line 972 def getBib(nodelist):
|
|
|
if node.nodeType == node.ELEMENT_NODE: |
if node.nodeType == node.ELEMENT_NODE: |
"""nothing""" |
"""nothing""" |
|
|
rc = rc+"<tr><td valign='right'>"+str(node.nodeName)+":</td><td> "+getText(node.childNodes)+"</td></tr>" |
rc = rc+"<tr><td valign='right'>"+str(node.nodeName)+":</td><td> "+getText(node.childNodes)+"</td></tr>" |
|
|
#print rc |
#print rc |
return rc+"</table>" |
return rc+"</table>" |
|
|
def getMetafile(path): |
def getMetafile(path): |
"""get index.meta""" |
"""get index.meta and translate it to an HTML""" |
html=[] |
html=[] |
if not os.path.exists(path+"/index.meta"): |
if not os.path.exists(path+"/index.meta"): |
|
|
Line 840 def getMetafile(path):
|
Line 988 def getMetafile(path):
|
f = os.popen("cat "+path+"/index.meta","r") |
f = os.popen("cat "+path+"/index.meta","r") |
lines = f.read() |
lines = f.read() |
dom = xml.dom.minidom.parseString(lines) |
dom = xml.dom.minidom.parseString(lines) |
|
try: |
name=getText(dom.getElementsByTagName("name")[0].childNodes) |
name=getText(dom.getElementsByTagName("name")[0].childNodes) |
|
except: |
|
name="NOT_DEFINED!!!" |
|
try: |
creator=getText(dom.getElementsByTagName("creator")[0].childNodes) |
creator=getText(dom.getElementsByTagName("creator")[0].childNodes) |
|
except: |
|
creator="NOT_DEFINED!!!" |
|
|
|
try: |
creation_date=getText(dom.getElementsByTagName("archive-creation-date")[0].childNodes) |
creation_date=getText(dom.getElementsByTagName("archive-creation-date")[0].childNodes) |
|
except: |
|
creation_date="NOT_DEFINED!!!" |
|
|
|
try: |
description=getText(dom.getElementsByTagName("description")[0].childNodes) |
description=getText(dom.getElementsByTagName("description")[0].childNodes) |
|
except: |
|
description="NOT_DEFINED!!!" |
|
|
try: |
try: |
type=getText(dom.getElementsByTagName("content-type")[0].childNodes) |
type=getText(dom.getElementsByTagName("content-type")[0].childNodes) |
except: |
except: |
Line 870 def getMetafile(path):
|
Line 1033 def getMetafile(path):
|
|
|
# html=html.encode('utf-8','replace')+getBib(bib.childNodes).encode('utf-8','replace') |
# html=html.encode('utf-8','replace')+getBib(bib.childNodes).encode('utf-8','replace') |
|
|
return html |
return html.encode('utf-8') |
|
|
def hasMetafile(path): |
def hasMetafile(path): |
"""get index.meta""" |
"""get index.meta""" |
return os.path.exists(path+"/index.meta") |
return os.path.exists(path+"/index.meta") |
#return path |
#return path |
|
|
|
|
|
|
def isdigilib2(path): |
def isdigilib2(path): |
"""check if folder is candidate for digilib without metadata""" |
"""check if folder is candidate for digilib without metadata""" |
try: |
try: |
Line 926 def isFullText(path,folder_name):
|
Line 1091 def isFullText(path,folder_name):
|
|
|
return 0 |
return 0 |
|
|
|
def isPdf(path,folder_name): |
|
"""check if foldername in path is full text""" |
|
try: |
|
dom=xml.dom.minidom.parse(path+"/index.meta") |
|
for node in dom.getElementsByTagName("dir"): |
|
|
|
if getText(node.getElementsByTagName("content-type")[0].childNodes).lower()=="pdf": |
|
|
|
if getText(node.getElementsByTagName("name")[0].childNodes)==folder_name: |
|
return 1 |
|
return 0 |
|
except: |
|
|
|
return 0 |
|
|
def isPresentation(path,folder_name): |
def isPresentation(path,folder_name): |
"""check if foldername in path is full text""" |
"""check if foldername in path is full text""" |
Line 1081 def isScannedDocument(self,path):
|
Line 1260 def isScannedDocument(self,path):
|
"""returns TRUE, wenn path der Stammordner eines gescannten Documents ist""" |
"""returns TRUE, wenn path der Stammordner eines gescannten Documents ist""" |
try: |
try: |
f = file(path+"/index.meta","r") |
f = file(path+"/index.meta","r") |
|
logging.debug("isscanneddocument: file=%s"%f) |
lines = f.read() |
lines = f.read() |
|
|
try: |
try: |
dom = xml.dom.minidom.parseString(lines) |
dom = xml.dom.minidom.parseString(lines) |
content_type=getText(dom.getElementsByTagName("content-type")[0].childNodes) |
content_type=getText(dom.getElementsByTagName("content-type")[0].childNodes) |
if (content_type=="scanned-document") or (content_type=="scanned document"): |
logging.debug("isscanneddocument: content_type=%s"%content_type) |
|
if (content_type=="scanned-document") or (content_type=="scanned document") or (content_type=="fulltext document"): |
return 1 |
return 1 |
else: |
else: |
return 0 |
return 0 |