1: #Neue Version Begin 5.4.2004
2:
3:
4: """Methoden zum hinzufügen von Dokumenten ins Archiv"""
5: from OSAS_helpers import readArchimedesXML
6: try:
7: import archive
8: except:
9: print "archive not imported"
10:
11: import os
12: from Products.PageTemplates.PageTemplateFile import PageTemplateFile
13: from Products.PageTemplates.PageTemplate import PageTemplate
14: import string
15: import urllib
16: import xml.dom.minidom
17: from time import localtime,strftime
18: from Globals import package_home
19:
20: import re
21: def showHelp(helptext):
22: """show helptext"""
23: return """<html>
24: <body>
25: %
26: </body>
27: </html>"""%helptext
28: def add(self, no_upload=0):
29: """ Add metadata or metadata and documents to the repository
30: no_upload=0 kein upload sonst upload von documententen"""
31:
32: #self.referencetypes=self.ZopeFind(self,obj_metatypes=['OSAS_MetadataMapping'])
33: self.referencetypes=self.ZopeFind(self)
34:
35: newtemplate=PageTemplateFile('Products/OSA_system/zpt/OSAS_add_new').__of__(self)
36: self.REQUEST.SESSION['path']=self.REQUEST['path']
37: if no_upload==0:
38: self.REQUEST.SESSION['no_upload']='yes'
39: else:
40: if self.REQUEST.SESSION.has_key('no_upload'):
41: del self.REQUEST.SESSION['no_upload']
42:
43: return newtemplate()
44:
45:
46:
47: def getISO():
48: """ISO"""
49: try:
50: f=file(os.path.join(package_home(globals()),'iso639-1.inc'),'r').readlines()
51:
52: ret={}
53: for lineraw in f:
54: line=lineraw.encode('ascii','replace')
55: value=string.split(line,'\t')[0].encode('ascii','replace')
56: key=string.split(line,'\t')[1].encode('ascii','replace')
57: ret[key]=value
58: except:
59: ret={}
60: return ret
61:
62:
63: def add2(self):
64: self.reftype=self.REQUEST['Reference Type']
65: self.REQUEST.SESSION['reftype']=self.reftype
66: self.bibdata={}
67: for referenceType in self.referencetypes:
68: #print referenceType
69: self.bibdata[referenceType[1].title]=referenceType[1].fields
70: self.bibdata['data']=referenceType[1]
71: self.fields=self.bibdata[self.reftype]
72: self.isolist=getISO()
73: tmp=getISO().keys()
74: tmp.sort()
75: self.isokeys=tmp
76: #listed=[ x for x in self.isolist.keys()]
77: #print listed
78: #sorted=listed.sort()
79: #print sorted
80:
81: newtemplate=PageTemplateFile('Products/OSA_system/zpt/OSAS_add_bibdata').__of__(self)
82: return newtemplate()
83: #return self.fields
84:
85:
86: def parse_query_string(str):
87: queries={}
88: key=""
89: value=""
90: tmp=""
91: toggle="key"
92: str=urllib.unquote(str)
93: for i in str:
94: if i=="=":
95: key=tmp
96: toggle="value"
97: tmp=""
98: elif i=="&":
99: queries[key]=tmp
100: tmp=""
101: toggle="key"
102: else:
103: if toggle=="key":
104: if i=="+" : i="-"
105: else:
106: if i=="+" : i=" "
107: tmp=tmp+i
108: queries[key]=tmp
109: return queries
110:
111: def add3(self):
112: """Foldername"""
113: metadata=parse_query_string(self.REQUEST['QUERY_STRING'])
114: self.REQUEST.SESSION['metadata']=metadata
115: vorschlag=[]
116: if metadata.has_key('author'):
117: vorschlag.append(metadata['author'][:5])
118: if metadata.has_key('title'):
119: vorschlag.append(metadata['title'][:5])
120: if metadata.has_key('year'):
121: vorschlag.append(metadata['year'])
122:
123:
124: vorschlag_naming=string.join(vorschlag,"_")
125:
126:
127: self.vorschlag_naming=unicode(vorschlag_naming,'ascii','ignore')
128: if self.REQUEST.SESSION.has_key('no_upload'):
129: self.REQUEST.SESSION['folder_name']=self.REQUEST.SESSION['path']
130: return add5(self)
131: else:
132: newtemplate=PageTemplateFile('Products/OSA_system/zpt/OSAS_add_naming').__of__(self)
133: return newtemplate()
134:
135:
136: def add4(self):
137: self.path=re.search(r"/mpiwg(.*)",self.REQUEST.SESSION['path']).group(1)
138:
139: self.folder_name=self.REQUEST['folder_name']
140: # next has to be changed -> error if back button is used!!
141: self.REQUEST.SESSION['path']=self.REQUEST.SESSION['path']+"/"+self.REQUEST['folder_name']
142:
143:
144: self.REQUEST.SESSION['folder_name']=self.folder_name
145: self.image_folder_name="pageimg"
146: newtemplate=PageTemplateFile('Products/OSA_system/zpt/OSAS_upload').__of__(self)
147: return newtemplate()
148:
149: def add5(self):
150: """ADD INDEX.META"""
151: newtemplate=PageTemplateFile('Products/OSA_system/zpt/OSAS_add_metadata').__of__(self)
152: return newtemplate()
153:
154: def add6(self):
155: metadata=parse_query_string(self.REQUEST['QUERY_STRING'])
156: metadata['archive-path']=os.path.split(self.REQUEST.SESSION['path'])[0]
157: #metadata['folder_name']=self.REQUEST.SESSION['folder_name']
158: metadata['folder_name']=os.path.split(self.REQUEST.SESSION['path'])[1]
159: metadata['content-type']="scanned document"
160: self.reftype=self.REQUEST.SESSION['reftype']
161: self.REQUEST.SESSION['add_metadata']=metadata
162: self.add_metadata=metadata
163: self.metadata=self.REQUEST.SESSION['metadata']
164: self.metadataprint=""
165: for tag in self.metadata.keys():
166: self.metadataprint=self.metadataprint+"<"+tag+">"+self.metadata[tag]+"</"+tag+">\n"
167:
168: newtemplate=PageTemplateFile('Products/OSA_system/zpt/index_meta').__of__(self)
169: newtemplate.content_type="text/plain"
170: renderxml = newtemplate()
171: if self.REQUEST.SESSION.has_key('no_upload'):
172: metapath=self.REQUEST.SESSION['path']+"/index.meta"
173: else:
174: metapath=self.add_metadata['archive-path']+"/"+self.add_metadata['folder_name']+"/index.meta"
175:
176: f=open(metapath,'w')
177: f.writelines(renderxml)
178: f.close()
179: os.chmod(metapath,0664)
180: os.popen('chmod -R 0775 %s'%self.add_metadata['archive-path']+"/"+self.add_metadata['folder_name'])
181: if self.REQUEST.SESSION.has_key('no_upload'):
182:
183: #newtemplate2=PageTemplateFile('/usr/local/mpiwg/Zope/Extensions/done',"text/html").__of__(self)
184: return self.REQUEST.response.redirect(self.REQUEST['URL2']+"?path="+self.REQUEST.SESSION['path'])
185: else:
186: #print self.add_metadata['archive-path']
187: self.viewpath=re.search(r"/mpiwg/online/(.*)",self.add_metadata['archive-path']).group(1)
188: newtemplate2=PageTemplateFile('Products/OSA_system/zpt/OSAS_saved').__of__(self)
189:
190:
191: newtemplate2.content_type="text/html"
192: self.REQUEST.response.setHeader('Content-Type','text/html')
193:
194:
195: return newtemplate2()
196:
197:
198:
199:
200: def date(self):
201: return strftime("%d.%m.%Y",localtime())
202:
203:
204: def addPresentation(self,path):
205: """add presentation to the path"""
206:
207: dom=xml.dom.minidom.parse(path+"/index.meta")
208:
209:
210: try:
211: author=archive.getText(dom.getElementsByTagName('author')[0].childNodes)
212: except:
213: try:
214: author=archive.getText(dom.getElementsByTagName('Author')[0].childNodes)
215: except:
216: author=archive.getText(dom.getElementsByTagName('Editor')[0].childNodes)
217:
218: title=archive.getText(dom.getElementsByTagName('title')[0].childNodes)
219: try:
220: date=archive.getText(dom.getElementsByTagName('year')[0].childNodes)
221: except:
222: try:
223: date=archive.getText(dom.getElementsByTagName('Year')[0].childNodes)
224: except:
225: date=archive.getText(dom.getElementsByTagName('date')[0].childNodes)
226: i=1
227: while os.path.exists("%02d-presentation"%i):
228: i+=1
229: self.REQUEST.SESSION['presentationname']="%02d-presentation"%i
230: self.REQUEST.SESSION['path']=path
231: self.REQUEST.SESSION['xmlvorschlag']="""<info>
232: <author>%s</author>
233: <title>%s</title>
234: <date>%s</date>
235: <display>yes</display>
236: </info>"""%(author,title,date)
237:
238: newtemplate=PageTemplateFile('Products/OSA_system/zpt/addPresentation').__of__(self)
239: return newtemplate()
240:
241: def addPresentation2(self):
242: """add presentation """
243: folder_name=self.REQUEST['folder_name']
244: #print self.REQUEST['folder_name']
245: content_description=self.REQUEST['content_description']
246:
247: path=self.REQUEST.SESSION['path']
248:
249: if not self.REQUEST.has_key('fileupload'):
250: xmlinfo=self.REQUEST['xmltext']
251: file_name="info.xml"
252:
253: else:
254: file_name=self.REQUEST['fileupload'].filename
255: xmlinfo=self.REQUEST.form['fileupload'].read()
256: # hack Multipart auswertung funktioniert nicht ausser bei mozilla
257: file_name="info.xml"
258: xmlinfo=self.REQUEST['xmltext']
259: try:
260: os.mkdir(path+"/"+folder_name)
261: except:
262: """nothing"""
263: #print "NAME:",file_name
264: f=open(path+"/"+folder_name+"/"+file_name,"w")
265: f.write(xmlinfo)
266: f.close()
267: try:
268: os.chmod(path+"/"+folder_name,0755)
269: except:
270: """NO"""
271:
272: os.chmod(path+"/"+folder_name+"/"+file_name,0644)
273: addDirsToIndexMeta(path,folder_name,content_description,'presentation')
274:
275: return self.REQUEST.RESPONSE.redirect(self.REQUEST['URL2']+'?path='+path)
276:
277: def addText(self,path):
278: """add fulltext to the path"""
279: self.REQUEST.SESSION['existing_names']=['pageimg'] # to be done generate list of existing text files
280: self.REQUEST.SESSION['path']=path
281: newtemplate=PageTemplateFile('Products/OSA_system/zpt/addText').__of__(self)
282: return newtemplate()
283:
284: def addText2(self):
285: folder_name=self.REQUEST['folder_name']
286: #print self.REQUEST['folder_name']
287: content_description=self.REQUEST['content_description']
288: path=self.REQUEST.SESSION['path']
289: file_name=self.REQUEST['fileupload'].filename
290: filedata=self.REQUEST.form['fileupload'].read()
291: os.mkdir(path+"/"+folder_name)
292: f=open(path+"/"+folder_name+"/"+file_name,"w")
293: f.write(filedata)
294: f.close()
295: os.chmod(path+"/"+folder_name,0755)
296: os.chmod(path+"/"+folder_name+"/"+file_name,0644)
297: addDirsToIndexMeta(path,folder_name,content_description,'fulltext')
298:
299: return self.REQUEST.RESPONSE.redirect(self.REQUEST['URL2']+'?path='+path)
300:
301: def addTextExternal(self,path,texturl,version):
302: """hinzufügen eines externen textes"""
303: try: #neue text version einlesen
304: texttemp=urllib.urlopen(texturl).readlines()
305: text=""
306: for line in texttemp:
307: text=text+line
308: except: #fehler beim lesen des textes
309: return "ERROR: cannot read: %s"%texturl
310: if TextExternalError(text): #kein xml header
311: return "ERROR: cannot read: %s"%texturl, "received:",text
312: textpath=getNewTextPath(path) #erzeuge neuen Ornder für den Text
313: splitted=string.split(texturl,"/")
314: name=splitted[len(splitted)-1] #Name des XML-files
315: try:
316: writefile=file(path+"/"+textpath+"/"+name,"w")
317: except:
318: return"ERROR: cannot write: %s"%path+"/"+textpath+"/"+name
319: writefile.write(text)
320: writefile.close()
321: os.chmod(path+"/"+textpath+"/"+name,0644)
322:
323: #add new file to XML
324: dom=xml.dom.minidom.parse(path+"/index.meta")
325: node=dom.getElementsByTagName('resource')[0] #getNode
326:
327: subnode=dom.createElement('dir')
328:
329: namenode=dom.createElement('name')
330: namenodetext=dom.createTextNode(textpath)
331: namenode.appendChild(namenodetext)
332: subnode.appendChild(namenode)
333:
334: descriptionnode=dom.createElement('description')
335: descriptionnodetext=dom.createTextNode('archimedes text:'+version)
336: descriptionnode.appendChild(descriptionnodetext)
337: subnode.appendChild(descriptionnode)
338:
339: contentnode=dom.createElement('content-type')
340: contentnodetext=dom.createTextNode('fulltext')
341: contentnode.appendChild(contentnodetext)
342: subnode.appendChild(contentnode)
343:
344: node.appendChild(subnode)
345:
346: writefile=file(path+"/index.meta","w")
347: writefile.write(dom.toxml().encode('utf-8'))
348: writefile.close()
349:
350: #change texttool tag
351: dom=xml.dom.minidom.parse(path+"/index.meta")
352: node=dom.getElementsByTagName('meta')[0] #getNode
353:
354: try: #texttool existiert schon
355: subnode=node.getElementsByTagName('texttool')[0]
356: except: #wenn nicht Fehler ausgeben
357: return "ERROR:no presentation configured yet, user Web Front End to do so!"
358:
359:
360: try:
361: texttoolnodelist=subnode.getElementsByTagName('text')
362:
363: if not len(texttoolnodelist)==0: #texttool tag existiert schon, dann löschen
364: subsubnode=subnode.removeChild(texttoolnodelist[0])
365: subsubnode.unlink()
366: except:
367: """nothing"""
368: # text neu anlegen
369: textfoldernode=dom.createElement('text')
370: textfoldernodetext=dom.createTextNode(textpath+"/"+name)
371: textfoldernode.appendChild(textfoldernodetext)
372: subnode.appendChild(textfoldernode)
373:
374: #index.meta ausgeben
375: writefile=file(path+"/index.meta","w")
376: writefile.write(dom.toxml().encode('utf-8'))
377: writefile.close()
378:
379: #registrieren
380: return urllib.urlopen("http://nausikaa2.rz-berlin.mpg.de:86/cgi-bin/toc/admin/reg.cgi?path=%s"%path).readlines()
381:
382:
383:
384: def TextExternalError(text):
385: firsts=text[0:10]
386: #print firsts
387: try:
388: match=re.search(r".*<?xml.*",firsts)
389: except:
390: return 1
391: return 0
392:
393: def getNewTextPath(path):
394: i=1
395: while os.path.exists(path+"/fulltext%i"%i):
396: i+=1
397: os.mkdir(path+"/fulltext%i"%i)
398: os.chmod(path+"/fulltext%i"%i,0755)
399: return "fulltext%i"%i
400:
401: def addImages(self,path):
402: """Imagesfolder to the path"""
403: self.REQUEST.SESSION['existing_names']=['pageimg'] # to be done generate list of existing pageimages files
404: self.REQUEST.SESSION['path']=path
405: newtemplate=PageTemplateFile('Products/OSA_system/zpt/OSAS_addImages').__of__(self)
406: return newtemplate()
407:
408: def addImages2(self):
409:
410: self.image_folder_name=self.REQUEST['folder_name']
411: #print self.REQUEST['folder_name']
412: self.content_description=self.REQUEST['content_description']
413: #self.path=self.REQUEST.SESSION['path']
414:
415:
416: self.content_type='images'
417: addDirsToIndexMeta(self.REQUEST.SESSION['path'],self.image_folder_name,self.content_description,self.content_type)
418: self.REQUEST.SESSION['path']=re.search(r"/mpiwg(.*)",self.REQUEST.SESSION['path']).group(1)
419: newtemplate=PageTemplateFile('Products/OSA_system/zpt/OSAS_upload2').__of__(self)
420: return newtemplate()
421:
422:
423:
424: def addDirsToIndexMeta(path,folder_name,content_description,content_type):
425: #f=file(path+"/index.meta",r)
426: dom=xml.dom.minidom.parse(path+"/index.meta")
427: node=dom.getElementsByTagName('resource')[0] #getNode
428:
429: subnode=dom.createElement('dir')
430:
431: namenode=dom.createElement('name')
432: namenodetext=dom.createTextNode(folder_name)
433: namenode.appendChild(namenodetext)
434: subnode.appendChild(namenode)
435:
436: descriptionnode=dom.createElement('description')
437: descriptionnodetext=dom.createTextNode(content_description)
438: descriptionnode.appendChild(descriptionnodetext)
439: subnode.appendChild(descriptionnode)
440:
441: contentnode=dom.createElement('content-type')
442: contentnodetext=dom.createTextNode(content_type)
443: contentnode.appendChild(contentnodetext)
444: subnode.appendChild(contentnode)
445:
446: node.appendChild(subnode)
447:
448: writefile=file(path+"/index.meta","w")
449: writefile.write(dom.toxml().encode('utf-8'))
450: writefile.close()
451:
452: def readArchimedesXML(folder):
453: """gib URL aus """
454: XML=urllib.urlopen("http://archimedes.mpiwg-berlin.mpg.de/cgi-bin/toc/toc.cgi?step=xmlcorpusmanifest").read()
455: #print XML
456: dom=xml.dom.minidom.parseString(XML)
457: items=dom.getElementsByTagName('item')
458: dict={}
459:
460: for item in items:
461: #print item.attributes['dir'].value
462: try:
463: dict[item.attributes['dir'].value]=item.attributes['xml'].value
464: #print item.attributes['dir'].value,item.attributes['text'].value
465: except:
466: """nothing"""
467:
468: if dict.has_key(folder):
469: return dict[folder]
470: else:
471: return ""
472:
473:
474:
475:
476: def combineTextImage2(self,path):
477: """erstellt bzw. ändert texttool meta tag"""
478: dom=xml.dom.minidom.parse(path+"/index.meta")
479: node=dom.getElementsByTagName('meta')[0] #getNode
480:
481:
482: subnodelist=node.getElementsByTagName('texttool')
483: if not len(subnodelist)==0: #texttool tag existiert schon, dann löschen
484: subnode=node.removeChild(subnodelist[0])
485: subnode.unlink()
486:
487: subnode=dom.createElement('texttool') #neu erzeugen
488:
489:
490: presentfile=os.listdir(path+"/"+self.REQUEST['presentation'])[0]
491:
492:
493: displaynode=dom.createElement('display')
494: displaynodetext=dom.createTextNode('yes')
495: displaynode.appendChild(displaynodetext)
496: subnode.appendChild(displaynode)
497:
498: if self.REQUEST.has_key('image'):
499: namenode=dom.createElement('image')
500: namenodetext=dom.createTextNode(self.REQUEST['image'])
501: namenode.appendChild(namenodetext)
502: subnode.appendChild(namenode)
503:
504: if self.REQUEST.has_key('text'):
505: textfile=os.listdir(path+"/"+self.REQUEST['text'])[0]
506: textfoldernode=dom.createElement('text')
507: textfoldernodetext=dom.createTextNode(path+"/"+self.REQUEST['text']+"/"+textfile)
508: textfoldernode.appendChild(textfoldernodetext)
509: subnode.appendChild(textfoldernode)
510:
511: if self.REQUEST.has_key('external'):#USE CVS instead of local text
512: textfoldernode=dom.createElement('text')
513: textfoldernodetext=dom.createTextNode(self.REQUEST.SESSION['externxml'])
514: textfoldernode.appendChild(textfoldernodetext)
515: subnode.appendChild(textfoldernode)
516:
517: if self.REQUEST.has_key('pagebreak'):
518: pagebreaknode=dom.createElement('pagebreak')
519: pagebreaknodetext=dom.createTextNode(self.REQUEST['pagebreak'])
520: pagebreaknode.appendChild(pagebreaknodetext)
521: subnode.appendChild(pagebreaknode)
522:
523: if self.REQUEST.has_key('presentation'):
524: presentationnode=dom.createElement('presentation')
525: presentationnodetext=dom.createTextNode(self.REQUEST['presentation']+"/"+presentfile)
526: presentationnode.appendChild(presentationnodetext)
527: subnode.appendChild(presentationnode)
528:
529:
530: if self.REQUEST.has_key('xslt'):
531: if not self.REQUEST['xslt']=="":
532: xsltnode=dom.createElement('xslt')
533: xsltnodetext=dom.createTextNode(self.REQUEST['xslt'])
534: xsltnode.appendChild(xsltnodetext)
535: subnode.appendChild(xsltnode)
536:
537:
538: if self.REQUEST.has_key('thumbtemplate'):
539: if not self.REQUEST['thumbtemplate']=="":
540: xsltnode=dom.createElement('thumbtemplate')
541: xsltnodetext=dom.createTextNode(self.REQUEST['thumbtemplate'])
542: xsltnode.appendChild(xsltnodetext)
543: subnode.appendChild(xsltnode)
544:
545: if self.REQUEST.has_key('topbar'):
546: if not self.REQUEST['topbar']=="":
547: xsltnode=dom.createElement('toptemplate')
548: xsltnodetext=dom.createTextNode(self.REQUEST['topbar'])
549: xsltnode.appendChild(xsltnodetext)
550: subnode.appendChild(xsltnode)
551:
552: if self.REQUEST.has_key('startpage'):
553: if not self.REQUEST['startpage']=="":
554: xsltnode=dom.createElement('startpage')
555: xsltnodetext=dom.createTextNode(self.REQUEST['startpage'])
556: xsltnode.appendChild(xsltnodetext)
557: subnode.appendChild(xsltnode)
558:
559: if self.REQUEST.has_key('project'):
560: if not self.REQUEST['project']=="":
561: xsltnode=dom.createElement('project')
562: xsltnodetext=dom.createTextNode(self.REQUEST['project'])
563: xsltnode.appendChild(xsltnodetext)
564: subnode.appendChild(xsltnode)
565:
566: node.appendChild(subnode)
567:
568: try:
569: node2=node.getElementsByTagName('bib')[0]
570: subs=node2.getElementsByTagName('lang')
571: for sub in subs:
572: print "X",sub
573: node2.removeChild(sub)
574: except:
575: """nothing"""
576: try:
577: main=dom.getElementsByTagName('bib')[0]
578: node=dom.createElement('lang')
579: textnode=dom.createTextNode(self.REQUEST['lang'])
580: print "LANG:",lang
581: node.appendChild(textnode)
582: main.appendChild(node)
583: except:
584: try:
585: subs=dom.getElementsByTagName('lang')
586: main=dom.getElementsByTagName('resource')[0]
587: for sub in subs:
588: main.removeChild(sub)
589: except:
590: """nothing"""
591:
592: try:
593: main=dom.getElementsByTagName('resource')[0]
594: node=dom.createElement('lang')
595: textnode=dom.createTextNode(self.REQUEST['lang'])
596: #print "LANG:",self.REQUEST['lang']
597: node.appendChild(textnode)
598: main.appendChild(node)
599: except:
600: """nothing"""
601:
602: writefile=file(path+"/index.meta","w")
603: writefile.write(dom.toxml().encode('utf-8'))
604: writefile.close()
605:
606:
607:
608: urllib.urlopen("http://nausikaa2.rz-berlin.mpg.de:86/cgi-bin/toc/admin/reg.cgi?path=%s"%path).readlines()
609: #return urllib.quote("http://nausikaa2.rz-berlin.mpg.de:86/cgi-bin/toc/admin/reg.cgi?path=%s"%path)
610: os.popen("ssh nausikaa2.rz-berlin.mpg.de /usr/local/mpiwg/scripts/scaleomat.pl %s /mpiwg/temp/online/scaled/thumb 90 >> /tmp/sc.out &"% re.sub('/mpiwg/online/','',self.REQUEST['path']+"/"+self.REQUEST['image']))
611:
612: #return self.REQUEST.RESPONSE.redirect(self.REQUEST['URL2']+'?path='+self.REQUEST['path'])
613:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>