""" TO DO generell falls noch ein File, das nicht index.meta -> archivierung ausgeben """
from types import *
import urllib
import os
import sys
import re
from AccessControl import ClassSecurityInfo
from AccessControl.Role import RoleManager
from Acquisition import Implicit
from Globals import Persistent
from time import strptime
from time import strftime
import time
import os.path
import dircache
import xml.dom.minidom
from Products.PageTemplates.PageTemplateFile import PageTemplateFile
from Products.PageTemplates.PageTemplate import PageTemplate
import tempfile
tempfile.tempdir="/var/tmp/archiver"
exclusion=[".HSResource","lost+found","Network Trash Folder","TheFindByContentFolder","TheVolumeSettingsFolder"]
class fsentry(Implicit, Persistent, RoleManager):
"""File entry class"""
path = ""
user = ""
month = ""
date =""
time = ""
security=ClassSecurityInfo()
def __init__(self,extpath):
"""initialize class"""
extpath=os.path.abspath(re.search(r"(.*)\n",extpath).group(1))
self.all=extpath
self.path=extpath
self.user=""
self.mtime=os.path.getmtime(extpath)
security.declarePublic('getPath')
def getPath(self):
"""Ausgabe von path"""
return self.path
security.declarePublic('getUser')
def getUser(self):
"""Ausgabe von user"""
return self.user
security.declarePublic('getDate')
def getDate(self):
"""Ausgabe von Date"""
return strftime("%Y%m%d%H%M",time.gmtime(self.mtime))
security.declarePublic('getDate')
def getID(self):
"""Ausgabe einer eindeutigen Sortierbaren ID"""
return self.getDate()+self.getPath()
security.declarePublic('getTime')
def getTime(self):
"""Ausgabe von path"""
return self.time
security.declarePublic('getAll')
def getAll(self):
"""Ausgabe von path"""
return self.all
class filesystem(Implicit, Persistent, RoleManager):
"""store filesystem"""
node={}
hasindex={}
security=ClassSecurityInfo()
def getfs(self,start):
"""load filessystem"""
f = os.popen("find "+ start+" -name '*' ","r")
lines = f.readlines()
return lines
def loadfs(self,start):
"""analyse filesystem"""
for line in self.getfs(start):
g=re.search(r"(.*/)(.*)\n",line)
if not g==None:
path=g.group(1)
file=g.group(2)
if self.node.has_key(path):
elements=self.node[path]
elements.append(file)
self.node[path]=elements
else:
self.node[path]=[file]
if (file=="index.meta") | (file=="meta"):
self.hasindex[path]="1"
def __init__(self,start,reload=0):
if reload==1:
self.node={}
self.hasindex={}
self.loadfs(start)
security.declarePublic('getNode')
def getNode(self):
return self.node
security.declarePublic('getKeys')
def getKeys(self):
return self.node.keys()
security.declarePublic('clearnode')
def clearnode(self):
self.node={}
return 0
security.declarePublic('hasIndex')
def hasIndex(self,path):
return self.hasindex.has_key(path)
def onlyIndex_old(self):
"""return only files with archive material"""
j={}
for k in self.node:
if self.hasindex.has_key(k):
if len(self.node[k])>1:
if (len(self.node[k])==2) & ('meta' not in self.node[k]):
j[k]=self.node[k]
elif (len(self.node[k])==2) & ('meta' in self.node[k]):
""" nothing """
else:
j[k]=self.node[k]
return j
def archive_the_path(self,path):
"""parse indexmeta and return digilib path"""
try:
#f = os.popen("cat "+path+"/index.meta","r")
f =file(path+"/index.meta","r")
lines = f.read()
try:
dom = xml.dom.minidom.parseString(lines)
if getText(dom.getElementsByTagName("content-type")[0].childNodes)=="folder":
"""folder nicht archivieren"""
return 0
else:
archive_storage_date=getText(dom.getElementsByTagName("archive-storage-date")[0].childNodes)
if archive_storage_date=="":
"""leer also archivieren"""
return 1
else:
"""nicht archivieren"""
return 0
except:
"""kein tag also archivieren"""
return 1
except:
"""kein index.meta also nicht archivieren"""
return 0
security.declarePublic('onlyIndex')
def onlyIndex(self):
"""return only files with archive material (archive-storage-date not set)"""
j={}
for k in self.node:
if self.archive_the_path(k):
j[k]=self.node[k]
return j
security.declarePublic('getImageDirs')
def getImageDirs(self,dom,path):
dirs=dom.getElementsByTagName("dir")
dirback=[]
for dir in dirs:
temp=getText(dir.getElementsByTagName("name")[0].childNodes)
temp2=re.search(r"(.*)/mpiwg/online/(.*)",path+"/"+temp)
if not temp2==None:
try:
dirback.append(temp2.group(2))
except:
"""nothing"""
else:
dirback.append(temp)
return dirback
security.declarePublic('digilib')
def digilib(self, path):
"""check if folder is a container for digilib files"""
if self.hasindex.has_key(path+"/"):
return(self.parseIndexMeta(path))
else:
return "NO"
security.declarePublic('isdigilib')
def isdigilib(self, path):
"""return number of possible image directories usefull for digilib"""
if self.hasindex.has_key(path+"/"):
return(len(self.parseIndexMeta(path)))
else:
return 0
security.declarePublic('parseIndexMeta')
def parseIndexMeta(self,k):
"""parse indexmeta and return digilib path"""
f = os.popen("cat "+k+"/index.meta","r")
lines = f.read()
try:
dom = xml.dom.minidom.parseString(lines)
content_type=getText(dom.getElementsByTagName("content-type")[0].childNodes)
if (content_type=="scanned-document") or (content_type=="scanned document"):
dirs=self.getImageDirs(dom,k)
return dirs
except:
return []
class filesystem2(Implicit, Persistent, RoleManager):
"""store filesystem"""
node={}
hasindex={}
security=ClassSecurityInfo()
def getfs(self,start):
"""load filessystem"""
f = os.popen("find "+ start+" -name '*' ","r")
lines = f.readlines()
return lines
def loadfs(self,start):
"""analyse filesystem"""
for line in self.getfs(start):
g=re.search(r"(.*/)(.*)\n",line)
if not g==None:
try:
path=g.group(1)
file=g.group(2)
except:
"""nothing"""
if self.node.has_key(path):
elements=self.node[path]
elements.append(file)
self.node[path]=elements
else:
self.node[path]=[file]
if (file=="index.meta") | (file=="meta"):
self.hasindex[path]="1"
def __init__(self,start,reload=0):
"""nothing"""
security.declarePublic('getImageDirs')
def getImageDirs(self,dom,path):
dirs=dom.getElementsByTagName("dir")
dirback=[]
for dir in dirs:
temp=getText(dir.getElementsByTagName("name")[0].childNodes)
temp2=re.search(r"(.*)/mpiwg/online/(.*)",path+"/"+temp)
if not temp2==None:
try:
dirback.append(temp2.group(2))
except:
"""nothing"""
else:
dirback.append(temp)
return dirback
security.declarePublic('digilib')
def digilib(self, path):
"""check if folder is a container for digilib files"""
if os.path.exists(path+"/index.meta"):
return(self.parseIndexMeta(path))
else:
return "NO"
security.declarePublic('isdigilib')
def isdigilib(self, path):
if os.path.exists(path+"/index.meta"):
return(len(self.parseIndexMeta(path)))
else:
return 0
security.declarePublic('parseIndexMeta')
def parseIndexMeta(self,k):
"""parse indexmeta and return digilib path"""
f = os.popen("cat "+k+"/index.meta","r")
lines = f.read()
try:
dom = xml.dom.minidom.parseString(lines)
content_type=getText(dom.getElementsByTagName("content-type")[0].childNodes)
if content_type=="scanned-document":
dirs=self.getImageDirs(dom,k)
return dirs
except:
return []
class browse(Implicit, Persistent, RoleManager):
security=ClassSecurityInfo()
tree={}
toggledict={}
def filterExcluded(self,dir):
ret=[]
for item in dir:
if not item in exclusion:
ret.append(item)
return ret
def __init__(self,startpath):
self.tree={}
self.tree[startpath]=self.filterExcluded(dircache.listdir(startpath))
security.declarePublic('getTree')
def getTree(self,path):
if self.tree.has_key(path):
return self.tree[path]
else:
self.tree[path]=self.filterExcluded(dircache.listdir(path))
return self.tree[path]
security.declarePublic('isDirectory')
def isDirectory(self,path,file):
return os.path.isdir(os.path.abspath(path+"/"+file))
security.declarePublic('toggle')
def toggle(self,tmppath,file):
path=tmppath+"/"+file
if self.toggledict.has_key(path):
if self.toggledict[path]==0:
self.toggledict[path]=1
else:
self.toggledict[path]=0
else:
self.toggledict[path]=4
security.declarePublic('isToggle')
def isToggle(self,tmppath,file):
path=tmppath+"/"+file
if self.toggledict.has_key(path):
return self.toggledict[path]
else:
return 0
def getfs(start):
"""return filesystem"""
f = os.popen("find "+ start+" -name '*'","r")
lines = f.readlines()
return lines
def showall(start):
lines = getfs(start)
for line in lines:
print line
return 0
def entries(start):
"""retrun list of entries of a filesystem"""
i=0
fs=[]
lines=getfs(start)
for line in lines:
try:
if os.path.exists(os.path.abspath(re.search(r"(.*)\n",line).group(1))):
fs.append(fsentry(line))
i=i+1
except:
"""nothing"""
return fs
def getfilesystem(start,reload=0):
"""load filesystem"""
k=filesystem(start,1)
return k
def getfilesystem2(start,reload=0):
"""load filesystem"""
k=filesystem2(start,1)
return k
def tree(start):
"""get the filetree"""
k=browse(start)
return k
def sort_by_date(fs):
"""sorts lists of fileentries"""
ls=[]
dict={}
for k in fs:
ls.append(k.getID())
dict[k.getID()]=k
ls.sort()
ls.reverse()
ret=[]
for j in ls:
ret.append(dict[j])
return ret
def path_to_link(path):
"""generates navigation bar for showfiles"""
string=""
tmppath=os.path.dirname(path)
i=0
pathes=[[path, os.path.basename(path)]]
while not (len(tmppath)==1):
i=i+1
if i>20: break
pathes.append([tmppath, os.path.basename(tmppath)])
tmppath=os.path.dirname(tmppath)
while i>=0:
string=string+""+pathes[i][1]+"/"
i=i-1
return string
def path_to_link_view(path):
"""generates navigation bar for viewfiles"""
string=""
tmppath=os.path.dirname(path)
i=0
pathes=[[path, os.path.basename(path)]]
while not (len(tmppath)==1):
i=i+1
if i>20: break
pathes.append([tmppath, os.path.basename(tmppath)])
tmppath=os.path.dirname(tmppath)
while i>=0:
string=string+""+pathes[i][1]+"/"
i=i-1
return string
def path_to_link_store(path):
"""generates navigation bar for viewfiles"""
string=""
tmppath=os.path.dirname(path)
i=0
pathes=[[path, os.path.basename(path)]]
while not (len(tmppath)==1):
i=i+1
if i>20: break
pathes.append([tmppath, os.path.basename(tmppath)])
tmppath=os.path.dirname(tmppath)
while i>=0:
string=string+""+pathes[i][1]+"/"
i=i-1
return string
class Error(Implicit, Persistent, RoleManager):
error=[]
security=ClassSecurityInfo()
def __init__(self,initerror):
self.error=initerror[0:]
security.declarePublic('getError')
def getError(self):
return self.error
class metacheck(Implicit, Persistent, RoleManager):
lines=[]
security=ClassSecurityInfo()
def parsearchive(self,str):
"""parse for error"""
retstr=''
if not len(str)==0:
for line in str:
retstr=retstr+line+"
"
check=re.search(r"(.*):(.*)",line)
if check.group(1)=='ABORT':
error="error"
elif check.group(1)=='DONE':
error="ok"
else:
error="running"
return [retstr,error]
else:
return ['','running']
def __init__(self,path):
"""archive the documents in path"""
self.lines=[]
if type(path)==StringType:
f = os.popen("/Users/dwinter/metacheck "+path,"r")
self.lines.append(Error([path,self.parsearchive(f.readlines())]))
else:
for singlepath in path:
f = os.popen("/Users/dwinter/metacheck "+singlepath,"r")
self.lines.append(Error([singlepath,self.parsearchive(f.readlines())]))
security.declarePublic('messages')
def messages(self):
return self.lines
class archive(Implicit, Persistent, RoleManager):
lines=[]
security=ClassSecurityInfo()
def parsearchive(self,str):
"""parse for error"""
retstr=''
if not len(str)==0:
for line in str:
retstr=retstr+line+"
"
check=re.search(r"(.*):(.*)",line)
if check.group(1)=='ABORT':
error="error"
elif check.group(1)=='DONE':
error="ok"
else:
error="running"
return [retstr,error]
else:
return ['','running']
def __init__(self,path,session):
"""archive the documents in path"""
self.lines=[]
self.filenames={}
session['archiver']=self
if type(path)==StringType:
self.filenames[path]=tempfile.mktemp()
f = os.popen("/usr/local/mpiwg/archive/archiver "+path+" > "+self.filenames[path]+" &","r")
else:
for singlepath in path:
self.filenames[singlepath]=tempfile.mktemp()
f = os.popen("/usr/local/mpiwg/archive/archiver "+singlepath+" > "+self.filenames[singlepath]+" &","r")
security.declarePublic('messages')
def messages(self):
self.lines=[]
for path in self.filenames.keys():
self.lines.append(Error([path,self.parsearchive(open(self.filenames[path],"r").readlines())]))
return self.lines
def evalext(str):
return eval(str)
def storeerror(ret,path,context,i):
session=context.REQUEST.SESSION
session['error%i'%i]=ret
session['path%i'%i]=path
return 'error?number=%i'%i
def geterror(str,context):
session=context.REQUEST.SESSION
return session[str]
def readfile(path):
ret=""
f=open(path,'r')
for g in f.readlines():
ret=ret+g
return ret
def writefile(self,path,txt,REQUEST):
f=open(path,'w')
f.write(txt)
f.close()
rval=self.aq_acquire('archive2')
return rval()
def metachecker(self,path):
"""check the metadata the documents in path"""
self.REQUEST.SESSION['path']=self.REQUEST['path']
return metacheck(path)
def archiver(self,path):
"""archive the documents in path"""
tmp=archive(path,self.REQUEST.SESSION)
return self.REQUEST.RESPONSE.redirect('archive4')
def getText(nodelist):
rc = ""
for node in nodelist:
if node.nodeType == node.TEXT_NODE:
rc = rc + node.data
return rc
def getBib(nodelist):
rc= "
"+str(node.nodeName)+": | "+getText(node.childNodes)+" |
created by: "+creator+" at: "+creation_date+"
" html=html+""+description+"
" try: bib = dom.getElementsByTagName("meta")[0].getElementsByTagName("bib")[0] if bib.attributes.has_key('type'): html=html+"