""" TO DO generell falls noch ein File, das nicht index.meta -> archivierung ausgeben """
from types import *
import urllib
import os
import sys
import re
from AccessControl import ClassSecurityInfo
from AccessControl.Role import RoleManager
from Acquisition import Implicit
from Globals import Persistent
from time import strptime
from time import strftime
import time
import os.path
import dircache
import xml.dom.minidom
from Products.PageTemplates.PageTemplateFile import PageTemplateFile
from Products.PageTemplates.PageTemplate import PageTemplate
import tempfile
tempfile.tempdir="/var/tmp/archiver"
exclusion=[".HSResource","lost+found","Network Trash Folder","TheFindByContentFolder","TheVolumeSettingsFolder"]
class fsentry(Implicit, Persistent, RoleManager):
"""File entry class"""
path = ""
user = ""
month = ""
date =""
time = ""
security=ClassSecurityInfo()
def __init__(self,extpath):
"""initialize class"""
extpath=os.path.abspath(re.search(r"(.*)\n",extpath).group(1))
self.all=extpath
self.path=extpath
self.user=""
self.mtime=os.path.getmtime(extpath)
security.declarePublic('getPath')
def getPath(self):
"""Ausgabe von path"""
return self.path
security.declarePublic('getUser')
def getUser(self):
"""Ausgabe von user"""
return self.user
security.declarePublic('getDate')
def getDate(self):
"""Ausgabe von Date"""
return strftime("%Y%m%d%H%M",time.gmtime(self.mtime))
security.declarePublic('getDate')
def getID(self):
"""Ausgabe einer eindeutigen Sortierbaren ID"""
return self.getDate()+self.getPath()
security.declarePublic('getTime')
def getTime(self):
"""Ausgabe von path"""
return self.time
security.declarePublic('getAll')
def getAll(self):
"""Ausgabe von path"""
return self.all
class filesystem(Implicit, Persistent, RoleManager):
"""store filesystem"""
node={}
hasindex={}
security=ClassSecurityInfo()
def getfs(self,start):
"""load filessystem"""
f = os.popen("find "+ start+" -name '*' ","r")
lines = f.readlines()
return lines
def loadfs(self,start):
"""analyse filesystem"""
for line in self.getfs(start):
g=re.search(r"(.*/)(.*)\n",line)
if not g==None:
path=g.group(1)
file=g.group(2)
if self.node.has_key(path):
elements=self.node[path]
elements.append(file)
self.node[path]=elements
else:
self.node[path]=[file]
if (file=="index.meta") | (file=="meta"):
self.hasindex[path]="1"
def __init__(self,start,reload=0):
if reload==1:
self.node={}
self.hasindex={}
self.loadfs(start)
security.declarePublic('getNode')
def getNode(self):
return self.node
security.declarePublic('getKeys')
def getKeys(self):
return self.node.keys()
security.declarePublic('clearnode')
def clearnode(self):
self.node={}
return 0
security.declarePublic('hasIndex')
def hasIndex(self,path):
return self.hasindex.has_key(path)
def onlyIndex_old(self):
"""return only files with archive material"""
j={}
for k in self.node:
if self.hasindex.has_key(k):
if len(self.node[k])>1:
if (len(self.node[k])==2) & ('meta' not in self.node[k]):
j[k]=self.node[k]
elif (len(self.node[k])==2) & ('meta' in self.node[k]):
""" nothing """
else:
j[k]=self.node[k]
return j
def archive_the_path(self,path):
"""parse indexmeta and return digilib path"""
try:
#f = os.popen("cat "+path+"/index.meta","r")
f =file(path+"/index.meta","r")
lines = f.read()
try:
dom = xml.dom.minidom.parseString(lines)
if getText(dom.getElementsByTagName("content-type")[0].childNodes)=="folder":
"""folder nicht archivieren"""
return 0
else:
archive_storage_date=getText(dom.getElementsByTagName("archive-storage-date")[0].childNodes)
if archive_storage_date=="":
"""leer also archivieren"""
return 1
else:
"""nicht archivieren"""
return 0
except:
"""kein tag also archivieren"""
return 1
except:
"""kein index.meta also nicht archivieren"""
return 0
security.declarePublic('onlyIndex')
def onlyIndex(self):
"""return only files with archive material (archive-storage-date not set)"""
j={}
for k in self.node:
if self.archive_the_path(k):
j[k]=self.node[k]
return j
security.declarePublic('getImageDirs')
def getImageDirs(self,dom,path):
dirs=dom.getElementsByTagName("dir")
dirback=[]
for dir in dirs:
temp=getText(dir.getElementsByTagName("name")[0].childNodes)
temp2=re.search(r"(.*)/mpiwg/online/(.*)",path+"/"+temp)
if not temp2==None:
try:
dirback.append(temp2.group(2))
except:
"""nothing"""
else:
dirback.append(temp)
return dirback
security.declarePublic('digilib')
def digilib(self, path):
"""check if folder is a container for digilib files"""
if self.hasindex.has_key(path+"/"):
return(self.parseIndexMeta(path))
else:
return "NO"
security.declarePublic('isdigilib')
def isdigilib(self, path):
"""return number of possible image directories usefull for digilib"""
if self.hasindex.has_key(path+"/"):
return(len(self.parseIndexMeta(path)))
else:
return 0
security.declarePublic('parseIndexMeta')
def parseIndexMeta(self,k):
"""parse indexmeta and return digilib path"""
f = os.popen("cat "+k+"/index.meta","r")
lines = f.read()
try:
dom = xml.dom.minidom.parseString(lines)
content_type=getText(dom.getElementsByTagName("content-type")[0].childNodes)
if (content_type=="scanned-document") or (content_type=="scanned document"):
dirs=self.getImageDirs(dom,k)
return dirs
except:
return []
class filesystem2(Implicit, Persistent, RoleManager):
"""store filesystem"""
node={}
hasindex={}
security=ClassSecurityInfo()
def getfs(self,start):
"""load filessystem"""
f = os.popen("find "+ start+" -name '*' ","r")
lines = f.readlines()
return lines
def loadfs(self,start):
"""analyse filesystem"""
for line in self.getfs(start):
g=re.search(r"(.*/)(.*)\n",line)
if not g==None:
try:
path=g.group(1)
file=g.group(2)
except:
"""nothing"""
if self.node.has_key(path):
elements=self.node[path]
elements.append(file)
self.node[path]=elements
else:
self.node[path]=[file]
if (file=="index.meta") | (file=="meta"):
self.hasindex[path]="1"
def __init__(self,start,reload=0):
"""nothing"""
security.declarePublic('getImageDirs')
def getImageDirs(self,dom,path):
dirs=dom.getElementsByTagName("dir")
dirback=[]
for dir in dirs:
temp=getText(dir.getElementsByTagName("name")[0].childNodes)
temp2=re.search(r"(.*)/mpiwg/online/(.*)",path+"/"+temp)
if not temp2==None:
try:
dirback.append(temp2.group(2))
except:
"""nothing"""
else:
dirback.append(temp)
return dirback
security.declarePublic('digilib')
def digilib(self, path):
"""check if folder is a container for digilib files"""
if os.path.exists(path+"/index.meta"):
return(self.parseIndexMeta(path))
else:
return "NO"
security.declarePublic('isdigilib')
def isdigilib(self, path):
if os.path.exists(path+"/index.meta"):
return(len(self.parseIndexMeta(path)))
else:
return 0
security.declarePublic('parseIndexMeta')
def parseIndexMeta(self,k):
"""parse indexmeta and return digilib path"""
f = os.popen("cat "+k+"/index.meta","r")
lines = f.read()
try:
dom = xml.dom.minidom.parseString(lines)
content_type=getText(dom.getElementsByTagName("content-type")[0].childNodes)
if content_type=="scanned-document":
dirs=self.getImageDirs(dom,k)
return dirs
except:
return []
class browse(Implicit, Persistent, RoleManager):
security=ClassSecurityInfo()
tree={}
toggledict={}
def filterExcluded(self,dir):
ret=[]
for item in dir:
if not item in exclusion:
ret.append(item)
return ret
def __init__(self,startpath):
self.tree={}
self.tree[startpath]=self.filterExcluded(dircache.listdir(startpath))
security.declarePublic('getTree')
def getTree(self,path):
if self.tree.has_key(path):
return self.tree[path]
else:
self.tree[path]=self.filterExcluded(dircache.listdir(path))
return self.tree[path]
security.declarePublic('isDirectory')
def isDirectory(self,path,file):
return os.path.isdir(os.path.abspath(path+"/"+file))
security.declarePublic('toggle')
def toggle(self,tmppath,file):
path=tmppath+"/"+file
if self.toggledict.has_key(path):
if self.toggledict[path]==0:
self.toggledict[path]=1
else:
self.toggledict[path]=0
else:
self.toggledict[path]=4
security.declarePublic('isToggle')
def isToggle(self,tmppath,file):
path=tmppath+"/"+file
if self.toggledict.has_key(path):
return self.toggledict[path]
else:
return 0
def getfs(start):
"""return filesystem"""
f = os.popen("find "+ start+" -name '*'","r")
lines = f.readlines()
return lines
def showall(start):
lines = getfs(start)
for line in lines:
print line
return 0
def entries(start):
"""retrun list of entries of a filesystem"""
i=0
fs=[]
lines=getfs(start)
for line in lines:
try:
if os.path.exists(os.path.abspath(re.search(r"(.*)\n",line).group(1))):
fs.append(fsentry(line))
i=i+1
except:
"""nothing"""
return fs
def getfilesystem(start,reload=0):
"""load filesystem"""
k=filesystem(start,1)
return k
def getfilesystem2(start,reload=0):
"""load filesystem"""
k=filesystem2(start,1)
return k
def tree(start):
"""get the filetree"""
k=browse(start)
return k
def sort_by_date(fs):
"""sorts lists of fileentries"""
ls=[]
dict={}
for k in fs:
ls.append(k.getID())
dict[k.getID()]=k
ls.sort()
ls.reverse()
ret=[]
for j in ls:
ret.append(dict[j])
return ret
def path_to_link(path):
"""generates navigation bar for showfiles"""
string=""
tmppath=os.path.dirname(path)
i=0
pathes=[[path, os.path.basename(path)]]
while not (len(tmppath)==1):
i=i+1
if i>20: break
pathes.append([tmppath, os.path.basename(tmppath)])
tmppath=os.path.dirname(tmppath)
while i>=0:
string=string+"<a href=showfiles?path="+pathes[i][0]+">"+pathes[i][1]+"</a>/"
i=i-1
return string
def path_to_link_view(path):
"""generates navigation bar for viewfiles"""
string=""
tmppath=os.path.dirname(path)
i=0
pathes=[[path, os.path.basename(path)]]
while not (len(tmppath)==1):
i=i+1
if i>20: break
pathes.append([tmppath, os.path.basename(tmppath)])
tmppath=os.path.dirname(tmppath)
while i>=0:
string=string+"<a href=viewfiles?path="+pathes[i][0]+">"+pathes[i][1]+"</a>/"
i=i-1
return string
def path_to_link_store(path):
"""generates navigation bar for viewfiles"""
string=""
tmppath=os.path.dirname(path)
i=0
pathes=[[path, os.path.basename(path)]]
while not (len(tmppath)==1):
i=i+1
if i>20: break
pathes.append([tmppath, os.path.basename(tmppath)])
tmppath=os.path.dirname(tmppath)
while i>=0:
string=string+"<a href=storefiles?path="+pathes[i][0]+">"+pathes[i][1]+"</a>/"
i=i-1
return string
class Error(Implicit, Persistent, RoleManager):
error=[]
security=ClassSecurityInfo()
def __init__(self,initerror):
self.error=initerror[0:]
security.declarePublic('getError')
def getError(self):
return self.error
class metacheck(Implicit, Persistent, RoleManager):
lines=[]
security=ClassSecurityInfo()
def parsearchive(self,str):
"""parse for error"""
retstr=''
if not len(str)==0:
for line in str:
retstr=retstr+line+"<br>"
check=re.search(r"(.*):(.*)",line)
if check.group(1)=='ABORT':
error="error"
elif check.group(1)=='DONE':
error="ok"
else:
error="running"
return [retstr,error]
else:
return ['','running']
def __init__(self,path):
"""archive the documents in path"""
self.lines=[]
if type(path)==StringType:
f = os.popen("/usr/local/mpiwg/archive/metacheck "+path,"r")
self.lines.append(Error([path,self.parsearchive(f.readlines())]))
else:
for singlepath in path:
f = os.popen("/usr/local/mpiwg/archive/metacheck "+singlepath,"r")
self.lines.append(Error([singlepath,self.parsearchive(f.readlines())]))
security.declarePublic('messages')
def messages(self):
return self.lines
class archive(Implicit, Persistent, RoleManager):
lines=[]
security=ClassSecurityInfo()
def parsearchive(self,str):
"""parse for error"""
retstr=''
if not len(str)==0:
for line in str:
retstr=retstr+line+"<br>"
check=re.search(r"(.*):(.*)",line)
if check.group(1)=='ABORT':
error="error"
elif check.group(1)=='DONE':
error="ok"
else:
error="running"
return [retstr,error]
else:
return ['','running']
def __init__(self,path,session):
"""archive the documents in path"""
self.lines=[]
self.filenames={}
session['archiver']=self
if type(path)==StringType:
self.filenames[path]=tempfile.mktemp()
f = os.popen("/usr/local/mpiwg/archive/archiver "+path+" > "+self.filenames[path]+" &","r")
else:
for singlepath in path:
self.filenames[singlepath]=tempfile.mktemp()
f = os.popen("/usr/local/mpiwg/archive/archiver "+singlepath+" > "+self.filenames[singlepath]+" &","r")
security.declarePublic('messages')
def messages(self):
self.lines=[]
for path in self.filenames.keys():
self.lines.append(Error([path,self.parsearchive(open(self.filenames[path],"r").readlines())]))
return self.lines
def evalext(str):
return eval(str)
def storeerror(ret,path,context,i):
session=context.REQUEST.SESSION
session['error%i'%i]=ret
session['path%i'%i]=path
return 'error?number=%i'%i
def geterror(str,context):
session=context.REQUEST.SESSION
return session[str]
def readfile(path):
ret=""
f=open(path,'r')
for g in f.readlines():
ret=ret+g
return ret
def writefile(self,path,txt,REQUEST):
f=open(path,'w')
f.write(txt)
f.close()
rval=self.aq_acquire('archive2')
return rval()
def metachecker(self,path):
"""check the metadata the documents in path"""
self.REQUEST.SESSION['path']=self.REQUEST['path']
return metacheck(path)
def archiver(self,path):
"""archive the documents in path"""
tmp=archive(path,self.REQUEST.SESSION)
return self.REQUEST.RESPONSE.redirect('archive4')
def getText(nodelist):
rc = ""
for node in nodelist:
if node.nodeType == node.TEXT_NODE:
rc = rc + node.data
return rc
def getBib(nodelist):
rc= "<table border='0'>"
print "HI"
for node in nodelist:
if node.nodeType == node.ELEMENT_NODE:
"""nothing"""
rc = rc+"<tr><td valign='right'>"+str(node.nodeName)+":</td><td> "+getText(node.childNodes)+"</td></tr>"
#print rc
return rc+"</table>"
def getMetafile(path):
"""get index.meta"""
html=[]
if not os.path.exists(path+"/index.meta"):
return "NO_METADATA"
else:
f = os.popen("cat "+path+"/index.meta","r")
lines = f.read()
dom = xml.dom.minidom.parseString(lines)
name=getText(dom.getElementsByTagName("name")[0].childNodes)
creator=getText(dom.getElementsByTagName("creator")[0].childNodes)
creation_date=getText(dom.getElementsByTagName("archive-creation-date")[0].childNodes)
description=getText(dom.getElementsByTagName("description")[0].childNodes)
try:
type=getText(dom.getElementsByTagName("content-type")[0].childNodes)
except:
type=""
if type=="scanned document":
html="<h3>Document: "+name+"</h3>"
elif type=="folder":
html="<h3>Folder: "+name+"</h3>"
else:
html="<h3>Document: "+name+"</h3>"
html=html+"<p><i>created by: "+creator+" at: "+creation_date+"</i></p>"
html=html+"<h4>Description</h4><p>"+description+"</p>"
try:
bib = dom.getElementsByTagName("meta")[0].getElementsByTagName("bib")[0]
if bib.attributes.has_key('type'):
html=html+"<h4>Info ("+bib.attributes['type'].value+")</h4>"
else:
html=html+"<h4>Info</h4>"
html=html+getBib(bib.childNodes)
print html
except:
"""none"""
# html=html.encode('utf-8','replace')+getBib(bib.childNodes).encode('utf-8','replace')
return html
def hasMetafile(path):
"""get index.meta"""
return os.path.exists(path+"/index.meta")
#return path
def isdigilib2(path):
"""check if folder is candidate for digilib without metadata"""
try:
dir=os.listdir(path)
imagesuffixes=['.gif','.jpg','.jpeg','.png','.tiff','.tif','.JPG','.TIFF','.TIF']
ret=""
for a in dir:
suffix=os.path.splitext(a)
if suffix[1] in imagesuffixes:
return 1
try:
dom=xml.dom.minidom.parse(os.path.split(path)[0]+"/index.meta")
for node in dom.getElementsByTagName("dir"):
if getText(node.getElementsByTagName("content-type")[0].childNodes)=="images":
if getText(node.getElementsByTagName("name")[0].childNodes)==os.path.split(path)[1]:
return 1
return 0
except:
return 0
except:
return 0
def isFullText(path,folder_name):
"""check if foldername in path is full text"""
try:
dom=xml.dom.minidom.parse(path+"/index.meta")
for node in dom.getElementsByTagName("dir"):
if getText(node.getElementsByTagName("content-type")[0].childNodes)=="fulltext":
if getText(node.getElementsByTagName("name")[0].childNodes)==folder_name:
return 1
return 0
except:
return 0
def isPresentation(path,folder_name):
"""check if foldername in path is full text"""
try:
dom=xml.dom.minidom.parse(path+"/index.meta")
#print dom.toxml()
for dirnode in dom.getElementsByTagName("dir"):
try:
if getText(dirnode.getElementsByTagName('content-type')[0].childNodes)=='presentation':
if getText(dirnode.getElementsByTagName("name")[0].childNodes)==folder_name:
return 1
except:
"""nothing"""
return 0
except:
return 0
def changeName(path):
try:
temp2=re.search(r"(.*)/mpiwg/online/(.*)",path)
if temp2==None:
return "digifiles/"+re.search(r"(.*)/mpiwg/production/docuserver/(.*)",path).group(2)
else:
return temp2.group(2)
except: # hack - im archivbereich keine online darstellung gibt jetzt ein no zurück.
return "NO"
def test(self):
self.i=1
#newtemplate=PageTemplateFile('/usr/local/mpiwg/Zope/Extensions/test').__of__(self)
self.manage_addProduct['OFSP'].manage_addDTMLMethod('neu','neu')
self.getattr('neu').manage_edit('HELLO','neu')
return "ok"
class ls(Implicit, Persistent, RoleManager):
"""File entry class"""
path = ""
user = ""
month = ""
date =""
time = ""
security=ClassSecurityInfo()
def __init__(self,start):
self.outfile=tempfile.mktemp()
start['outfile']=self
os.popen("ls -R / >"+self.outfile+" &","r")
security.declarePublic('read')
def read(self):
return self.f.read()
security.declarePublic('retself')
def retself(self):
return self
security.declarePublic('all')
def all(self):
ret=""
for g in self.f:
ret=ret+g
return ret
security.declarePublic('printOutfile')
def printOutfile(self):
while not os.path.exists(self.outfile):
"""nothing"""
return open(self.outfile).readlines()
class overview(Implicit,Persistent, RoleManager):
dir=[]
resources={}
security=ClassSecurityInfo()
def __init__(self,path):
dir=os.listdir(path)
for file in dir:
self.resources[self.getResource(path,file)]=path+"/"+file
def getResource(self,path,filename):
f=file(path+"/"+filename,'r')
for line in f.readlines():
if line[0:4]=="INFO":
if line[6:14]=="resource":
return line
return "error"
def parsearchive(self,str):
"""parse for error"""
retstr=''
if not len(str)==0:
for line in str:
retstr=retstr+line+"<br>"
check=re.search(r"(.*):(.*)",line)
if check.group(1)=='ABORT':
error="error"
elif check.group(1)=='DONE':
error="ok"
else:
error="running"
return [retstr,error]
else:
return ['','running']
security.declarePublic('messages')
def messages(self):
self.lines=[]
for name in self.resources.keys():
path=self.resources[name]
self.lines.append(Error([name,self.parsearchive(open(path,"r").readlines())]))
return self.lines
security.declarePublic('printResource')
def printResource(self):
return self.resources
def getoverview(path):
return overview(path)
def ls_test(self):
tmp=ls(self.REQUEST.SESSION)
return self.REQUEST.RESPONSE.redirect('next')
def storeFile(self,something):
self.REQUEST.SESSION['something']=something
return 1
def getFile(self):
return self.REQUEST.SESSION['something']
def isFolder(self,path):
"""returns TRUE, wenn path ein Folder ist in den weitere Objekte Folder oder Dokumente gelegt werden dürfen"""
return not isScannedDocument(self,path) # vorläufig sind alle Documente die keine scanned documente sind folder.
def isScannedDocument(self,path):
"""returns TRUE, wenn path der Stammordner eines gescannten Documents ist"""
try:
f = file(path+"/index.meta","r")
lines = f.read()
try:
dom = xml.dom.minidom.parseString(lines)
content_type=getText(dom.getElementsByTagName("content-type")[0].childNodes)
if (content_type=="scanned-document") or (content_type=="scanned document"):
return 1
else:
return 0
except:
return 0
except:
return 0
from time import localtime,strftime
def date(self):
return strftime("%d.%m.%Y",localtime())
def EditIndex(self,path):
try:
dom=xml.dom.minidom.parse(path+"/index.meta")
indexmeta=dom.toxml()
except:
indexmeta=""
self.REQUEST.SESSION['indexmeta']=indexmeta
self.REQUEST.SESSION['path']=path
newtemplate=PageTemplateFile('/usr/local/mpiwg/Zope/Extensions/editindex').__of__(self)
return newtemplate()
def EditIndex2(self):
if not self.REQUEST.has_key('fileupload'):
#newtext=urllib.unquote(self.REQUEST['indexmeta'])
newtext=self.REQUEST['indexmeta']
print newtext
else:
self.file_name=self.REQUEST['fileupload'].filename
#newtext=self.REQUEST.form['fileupload'].read()
# HACK DW
newtext=self.REQUEST['indexmeta']
indexmeta=file(self.REQUEST.SESSION['path']+"/index.meta","w")
indexmeta.writelines(newtext)
return self.REQUEST.response.redirect("storage/storefiles?path="+self.REQUEST.SESSION['path'])
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>