1: #BAUSTELLE
2:
3:
4: """Methoden zum hinzufügen von Dokumenten ins Archiv"""
5: from OSAS_helpers import readArchimedesXML
6: import archive
7: import os
8: from Products.PageTemplates.PageTemplateFile import PageTemplateFile
9: from Products.PageTemplates.PageTemplate import PageTemplate
10: import string
11: import urllib
12: import xml.dom.minidom
13: from time import localtime,strftime
14:
15: #referencetypes=['Book','Bic','Journal Article','Manuscript','jiji']
16:
17: import re
18: def add(self, no_upload=0):
19: """ Add metadata or metadata and documents to the repository
20: no_upload=0 kein upload sonst upload von documententen"""
21:
22: self.referencetypes=['Book','Journal Article','Manuscript']
23:
24: newtemplate=PageTemplateFile('Products/OSA_system/zpt/OSAS_add_new').__of__(self)
25: self.REQUEST.SESSION['path']=self.REQUEST['path']
26: if no_upload==0:
27: self.REQUEST.SESSION['no_upload']='yes'
28: else:
29: if self.REQUEST.SESSION.has_key('no_upload'):
30: del self.REQUEST.SESSION['no_upload']
31:
32: return newtemplate()
33: return self.referencetypes
34:
35:
36:
37: def getISO():
38: """ISO"""
39: try:
40: f=file('/usr/local/mpiwg/Zope/lib/python/Products/OSA_system/iso639-1.inc','r').readlines()
41: #f=file('OSA_system/iso639-1.inc','r').readlines()
42: ret={}
43: for lineraw in f:
44: line=lineraw.encode('ascii','replace')
45: value=string.split(line,'\t')[0].encode('ascii','replace')
46: key=string.split(line,'\t')[1].encode('ascii','replace')
47: ret[key]=value
48: except:
49: ret={}
50: return ret
51:
52: def add2(self):
53: self.reftype=self.REQUEST['Reference Type']
54: self.REQUEST.SESSION['reftype']=self.reftype
55: self.bibdata={'Book':['author','year','title','series editor','series title','series volume','number of pages','city','publisher','edition','number of volumes','translator','ISBN ISSN'],
56: 'Journal Article':['author','year','title','journal','volume','issue','pages','alternate journal','call number'],
57: 'Manuscript':['author','year','title','location','signature','pages','editorial remarks','description','keywords']}
58:
59: self.fields=self.bibdata[self.reftype]
60: self.isolist=getISO()
61: tmp=getISO().keys()
62: tmp.sort()
63: self.isokeys=tmp
64: #listed=[ x for x in self.isolist.keys()]
65: #print listed
66: #sorted=listed.sort()
67: #print sorted
68:
69: newtemplate=PageTemplateFile('Products/OSA_system/zpt/OSAS_add_bibdata').__of__(self)
70: return newtemplate()
71: #return self.fields
72:
73:
74: def parse_query_string(str):
75: queries={}
76: key=""
77: value=""
78: tmp=""
79: toggle="key"
80: str=urllib.unquote(str)
81: for i in str:
82: if i=="=":
83: key=tmp
84: toggle="value"
85: tmp=""
86: elif i=="&":
87: queries[key]=tmp
88: tmp=""
89: toggle="key"
90: else:
91: if toggle=="key":
92: if i=="+" : i="-"
93: else:
94: if i=="+" : i=" "
95: tmp=tmp+i
96: queries[key]=tmp
97: return queries
98:
99: def add3(self):
100: """Foldername"""
101: metadata=parse_query_string(self.REQUEST['QUERY_STRING'])
102: self.REQUEST.SESSION['metadata']=metadata
103: vorschlag_naming=metadata['author'][:5]+"_"+metadata['title'][:5]+"_"+metadata['year']
104: self.vorschlag_naming=vorschlag_naming.decode('ascii','ignore')
105: if self.REQUEST.SESSION.has_key('no_upload'):
106: self.REQUEST.SESSION['folder_name']=self.REQUEST.SESSION['path']
107: return add5(self)
108: else:
109: newtemplate=PageTemplateFile('Products/OSA_system/zpt/OSAS_add_naming').__of__(self)
110: return newtemplate()
111:
112:
113: def add4(self):
114: self.path=re.search(r"/mpiwg(.*)",self.REQUEST.SESSION['path']).group(1)
115:
116: self.folder_name=self.REQUEST['folder_name']
117: # netx has to be changed -> error if back button is used!!
118: self.REQUEST.SESSION['path']=self.REQUEST.SESSION['path']+"/"+self.REQUEST['folder_name']
119:
120:
121: self.REQUEST.SESSION['folder_name']=self.folder_name
122: self.image_folder_name="pageimg"
123: newtemplate=PageTemplateFile('Products/OSA_system/zpt/OSAS_upload').__of__(self)
124: return newtemplate()
125:
126: def add5(self):
127: """ADD INDEX.META"""
128: newtemplate=PageTemplateFile('Products/OSA_system/zpt/OSAS_add_metadata').__of__(self)
129: return newtemplate()
130:
131: def add6(self):
132: metadata=parse_query_string(self.REQUEST['QUERY_STRING'])
133: metadata['archive-path']=os.path.split(self.REQUEST.SESSION['path'])[0]
134: #metadata['folder_name']=self.REQUEST.SESSION['folder_name']
135: metadata['folder_name']=os.path.split(self.REQUEST.SESSION['path'])[1]
136: metadata['content-type']="scanned document"
137: self.reftype=self.REQUEST.SESSION['reftype']
138: self.REQUEST.SESSION['add_metadata']=metadata
139: self.add_metadata=metadata
140: self.metadata=self.REQUEST.SESSION['metadata']
141: self.metadataprint=""
142: for tag in self.metadata.keys():
143: self.metadataprint=self.metadataprint+"<"+tag+">"+self.metadata[tag]+"</"+tag+">\n"
144:
145: newtemplate=PageTemplateFile('Products/OSA_system/zpt/index_meta').__of__(self)
146: newtemplate.content_type="text/plain"
147: renderxml = newtemplate()
148: if self.REQUEST.SESSION.has_key('no_upload'):
149: metapath=self.REQUEST.SESSION['path']+"/index.meta"
150: else:
151: metapath=self.add_metadata['archive-path']+"/"+self.add_metadata['folder_name']+"/index.meta"
152:
153: f=open(metapath,'w')
154: f.writelines(renderxml)
155: f.close()
156: os.chmod(metapath,0664)
157: os.popen('chmod -R 0775 %s'%self.add_metadata['archive-path']+"/"+self.add_metadata['folder_name'])
158: if self.REQUEST.SESSION.has_key('no_upload'):
159:
160: #newtemplate2=PageTemplateFile('/usr/local/mpiwg/Zope/Extensions/done',"text/html").__of__(self)
161: return self.REQUEST.response.redirect(self.REQUEST['URL2']+"?path="+self.REQUEST.SESSION['path'])
162: else:
163: #print self.add_metadata['archive-path']
164: self.viewpath=re.search(r"/mpiwg/online/(.*)",self.add_metadata['archive-path']).group(1)
165: newtemplate2=PageTemplateFile('Products/OSA_system/zpt/OSAS_saved').__of__(self)
166:
167:
168: newtemplate2.content_type="text/html"
169: self.REQUEST.response.setHeader('Content-Type','text/html')
170:
171:
172: return newtemplate2()
173:
174:
175:
176:
177: def date(self):
178: return strftime("%d.%m.%Y",localtime())
179:
180:
181: def addPresentation(self,path):
182: """add presentation to the path"""
183:
184: dom=xml.dom.minidom.parse(path+"/index.meta")
185:
186:
187: try:
188: author=archive.getText(dom.getElementsByTagName('author')[0].childNodes)
189: except:
190: try:
191: author=archive.getText(dom.getElementsByTagName('Author')[0].childNodes)
192: except:
193: author=archive.getText(dom.getElementsByTagName('Editor')[0].childNodes)
194:
195: title=archive.getText(dom.getElementsByTagName('title')[0].childNodes)
196: try:
197: date=archive.getText(dom.getElementsByTagName('year')[0].childNodes)
198: except:
199: try:
200: date=archive.getText(dom.getElementsByTagName('Year')[0].childNodes)
201: except:
202: date=archive.getText(dom.getElementsByTagName('date')[0].childNodes)
203: i=1
204: while os.path.exists("%02d-presentation"%i):
205: i+=1
206: self.REQUEST.SESSION['presentationname']="%02d-presentation"%i
207: self.REQUEST.SESSION['path']=path
208: self.REQUEST.SESSION['xmlvorschlag']="""<info>
209: <author>%s</author>
210: <title>%s</title>
211: <date>%s</date>
212: <display>yes</display>
213: </info>"""%(author,title,date)
214:
215: newtemplate=PageTemplateFile('Products/OSA_system/zpt/addPresentation').__of__(self)
216: return newtemplate()
217:
218: def addPresentation2(self):
219: """add presentation """
220: folder_name=self.REQUEST['folder_name']
221: #print self.REQUEST['folder_name']
222: content_description=self.REQUEST['content_description']
223:
224: path=self.REQUEST.SESSION['path']
225:
226: if not self.REQUEST.has_key('fileupload'):
227: xmlinfo=self.REQUEST['xmltext']
228: file_name="info.xml"
229:
230: else:
231: file_name=self.REQUEST['fileupload'].filename
232: xmlinfo=self.REQUEST.form['fileupload'].read()
233: # hack Multipart auswertung funktioniert nicht ausser bei mozilla
234: file_name="info.xml"
235: xmlinfo=self.REQUEST['xmltext']
236: try:
237: os.mkdir(path+"/"+folder_name)
238: except:
239: """nothing"""
240: #print "NAME:",file_name
241: f=open(path+"/"+folder_name+"/"+file_name,"w")
242: f.write(xmlinfo)
243: f.close()
244: os.chmod(path+"/"+folder_name,0755)
245: os.chmod(path+"/"+folder_name+"/"+file_name,0644)
246: addDirsToIndexMeta(path,folder_name,content_description,'presentation')
247:
248: return self.REQUEST.RESPONSE.redirect(self.REQUEST['URL2']+'?path='+path)
249:
250: def addText(self,path):
251: """add fulltext to the path"""
252: self.REQUEST.SESSION['existing_names']=['pageimg'] # to be done generate list of existing text files
253: self.REQUEST.SESSION['path']=path
254: newtemplate=PageTemplateFile('Products/OSA_system/zpt/addText').__of__(self)
255: return newtemplate()
256:
257: def addText2(self):
258: folder_name=self.REQUEST['folder_name']
259: #print self.REQUEST['folder_name']
260: content_description=self.REQUEST['content_description']
261: path=self.REQUEST.SESSION['path']
262: file_name=self.REQUEST['fileupload'].filename
263: filedata=self.REQUEST.form['fileupload'].read()
264: os.mkdir(path+"/"+folder_name)
265: f=open(path+"/"+folder_name+"/"+file_name,"w")
266: f.write(filedata)
267: f.close()
268: os.chmod(path+"/"+folder_name,0755)
269: os.chmod(path+"/"+folder_name+"/"+file_name,0644)
270: addDirsToIndexMeta(path,folder_name,content_description,'fulltext')
271:
272: return self.REQUEST.RESPONSE.redirect(self.REQUEST['URL2']+'?path='+path)
273:
274: def addTextExternal(self,path,texturl,version):
275: """hinzufügen eines externen textes"""
276: try: #neue text version einlesen
277: texttemp=urllib.urlopen(texturl).readlines()
278: text=""
279: for line in texttemp:
280: text=text+line
281: except: #fehler beim lesen des textes
282: return "ERROR: cannot read: %s"%texturl
283: if TextExternalError(text): #kein xml header
284: return "ERROR: cannot read: %s"%texturl, "received:",text
285: textpath=getNewTextPath(path) #erzeuge neuen Ornder für den Text
286: splitted=string.split(texturl,"/")
287: name=splitted[len(splitted)-1] #Name des XML-files
288: try:
289: writefile=file(path+"/"+textpath+"/"+name,"w")
290: except:
291: return"ERROR: cannot write: %s"%path+"/"+textpath+"/"+name
292: writefile.write(text)
293: writefile.close()
294: os.chmod(path+"/"+textpath+"/"+name,0644)
295:
296: #add new file to XML
297: dom=xml.dom.minidom.parse(path+"/index.meta")
298: node=dom.getElementsByTagName('resource')[0] #getNode
299:
300: subnode=dom.createElement('dir')
301:
302: namenode=dom.createElement('name')
303: namenodetext=dom.createTextNode(textpath)
304: namenode.appendChild(namenodetext)
305: subnode.appendChild(namenode)
306:
307: descriptionnode=dom.createElement('description')
308: descriptionnodetext=dom.createTextNode('archimedes text:'+version)
309: descriptionnode.appendChild(descriptionnodetext)
310: subnode.appendChild(descriptionnode)
311:
312: contentnode=dom.createElement('content-type')
313: contentnodetext=dom.createTextNode('fulltext')
314: contentnode.appendChild(contentnodetext)
315: subnode.appendChild(contentnode)
316:
317: node.appendChild(subnode)
318:
319: writefile=file(path+"/index.meta","w")
320: writefile.write(dom.toxml().encode('utf-8'))
321: writefile.close()
322:
323: #change texttool tag
324: dom=xml.dom.minidom.parse(path+"/index.meta")
325: node=dom.getElementsByTagName('meta')[0] #getNode
326:
327: try: #texttool existiert schon
328: subnode=node.getElementsByTagName('texttool')[0]
329: except: #wenn nicht Fehler ausgeben
330: return "ERROR:no presentation configured yet, user Web Front End to do so!"
331:
332:
333: try:
334: texttoolnodelist=subnode.getElementsByTagName('text')
335:
336: if not len(texttoolnodelist)==0: #texttool tag existiert schon, dann löschen
337: subsubnode=subnode.removeChild(texttoolnodelist[0])
338: subsubnode.unlink()
339: except:
340: """nothing"""
341: # text neu anlegen
342: textfoldernode=dom.createElement('text')
343: textfoldernodetext=dom.createTextNode(textpath+"/"+name)
344: textfoldernode.appendChild(textfoldernodetext)
345: subnode.appendChild(textfoldernode)
346:
347: #index.meta ausgeben
348: writefile=file(path+"/index.meta","w")
349: writefile.write(dom.toxml().encode('utf-8'))
350: writefile.close()
351:
352: #registrieren
353: return urllib.urlopen("http://nausikaa2.rz-berlin.mpg.de:86/cgi-bin/toc/admin/reg.cgi?path=%s"%path).readlines()
354:
355:
356:
357: def TextExternalError(text):
358: firsts=text[0:10]
359: #print firsts
360: try:
361: match=re.search(r".*<?xml.*",firsts)
362: except:
363: return 1
364: return 0
365:
366: def getNewTextPath(path):
367: i=1
368: while os.path.exists(path+"/fulltext%i"%i):
369: i+=1
370: os.mkdir(path+"/fulltext%i"%i)
371: os.chmod(path+"/fulltext%i"%i,0755)
372: return "fulltext%i"%i
373:
374: def addImages(self,path):
375: """Imagesfolder to the path"""
376: self.REQUEST.SESSION['existing_names']=['pageimg'] # to be done generate list of existing pageimages files
377: self.REQUEST.SESSION['path']=path
378: newtemplate=PageTemplateFile('Products/OSA_system/zpt/OSAS_addImages').__of__(self)
379: return newtemplate()
380:
381: def addImages2(self):
382:
383: self.image_folder_name=self.REQUEST['folder_name']
384: #print self.REQUEST['folder_name']
385: self.content_description=self.REQUEST['content_description']
386: #self.path=self.REQUEST.SESSION['path']
387:
388:
389: self.content_type='images'
390: addDirsToIndexMeta(self.REQUEST.SESSION['path'],self.image_folder_name,self.content_description,self.content_type)
391: self.REQUEST.SESSION['path']=re.search(r"/mpiwg(.*)",self.REQUEST.SESSION['path']).group(1)
392: newtemplate=PageTemplateFile('Products/OSA_system/zpt/OSAS_upload2').__of__(self)
393: return newtemplate()
394:
395:
396:
397: def addDirsToIndexMeta(path,folder_name,content_description,content_type):
398: #f=file(path+"/index.meta",r)
399: dom=xml.dom.minidom.parse(path+"/index.meta")
400: node=dom.getElementsByTagName('resource')[0] #getNode
401:
402: subnode=dom.createElement('dir')
403:
404: namenode=dom.createElement('name')
405: namenodetext=dom.createTextNode(folder_name)
406: namenode.appendChild(namenodetext)
407: subnode.appendChild(namenode)
408:
409: descriptionnode=dom.createElement('description')
410: descriptionnodetext=dom.createTextNode(content_description)
411: descriptionnode.appendChild(descriptionnodetext)
412: subnode.appendChild(descriptionnode)
413:
414: contentnode=dom.createElement('content-type')
415: contentnodetext=dom.createTextNode(content_type)
416: contentnode.appendChild(contentnodetext)
417: subnode.appendChild(contentnode)
418:
419: node.appendChild(subnode)
420:
421: writefile=file(path+"/index.meta","w")
422: writefile.write(dom.toxml().encode('utf-8'))
423: writefile.close()
424:
425: def readArchimedesXML(folder):
426: """gib URL aus """
427: XML=urllib.urlopen("http://archimedes.mpiwg-berlin.mpg.de/cgi-bin/toc/toc.cgi?step=xmlcorpusmanifest").read()
428: #print XML
429: dom=xml.dom.minidom.parseString(XML)
430: items=dom.getElementsByTagName('item')
431: dict={}
432:
433: for item in items:
434: #print item.attributes['dir'].value
435: try:
436: dict[item.attributes['dir'].value]=item.attributes['xml'].value
437: #print item.attributes['dir'].value,item.attributes['text'].value
438: except:
439: """nothing"""
440:
441: if dict.has_key(folder):
442: return dict[folder]
443: else:
444: return ""
445:
446:
447:
448:
449: def combineTextImage2(self,path):
450: """erstellt bzw. ändert texttool meta tag"""
451: dom=xml.dom.minidom.parse(path+"/index.meta")
452: node=dom.getElementsByTagName('meta')[0] #getNode
453:
454:
455: subnodelist=node.getElementsByTagName('texttool')
456: if not len(subnodelist)==0: #texttool tag existiert schon, dann löschen
457: subnode=node.removeChild(subnodelist[0])
458: subnode.unlink()
459:
460: subnode=dom.createElement('texttool') #neu erzeugen
461:
462:
463: presentfile=os.listdir(path+"/"+self.REQUEST['presentation'])[0]
464:
465:
466: displaynode=dom.createElement('display')
467: displaynodetext=dom.createTextNode('yes')
468: displaynode.appendChild(displaynodetext)
469: subnode.appendChild(displaynode)
470:
471: if self.REQUEST.has_key('image'):
472: namenode=dom.createElement('image')
473: namenodetext=dom.createTextNode(self.REQUEST['image'])
474: namenode.appendChild(namenodetext)
475: subnode.appendChild(namenode)
476:
477: if self.REQUEST.has_key('text'):
478: textfile=os.listdir(path+"/"+self.REQUEST['text'])[0]
479: textfoldernode=dom.createElement('text')
480: textfoldernodetext=dom.createTextNode(path+"/"+self.REQUEST['text']+"/"+textfile)
481: textfoldernode.appendChild(textfoldernodetext)
482: subnode.appendChild(textfoldernode)
483:
484: if self.REQUEST.has_key('external'):#USE CVS instead of local text
485: textfoldernode=dom.createElement('text')
486: textfoldernodetext=dom.createTextNode(self.REQUEST.SESSION['externxml'])
487: textfoldernode.appendChild(textfoldernodetext)
488: subnode.appendChild(textfoldernode)
489:
490: if self.REQUEST.has_key('pagebreak'):
491: pagebreaknode=dom.createElement('pagebreak')
492: pagebreaknodetext=dom.createTextNode(self.REQUEST['pagebreak'])
493: pagebreaknode.appendChild(pagebreaknodetext)
494: subnode.appendChild(pagebreaknode)
495:
496: if self.REQUEST.has_key('presentation'):
497: presentationnode=dom.createElement('presentation')
498: presentationnodetext=dom.createTextNode(self.REQUEST['presentation']+"/"+presentfile)
499: presentationnode.appendChild(presentationnodetext)
500: subnode.appendChild(presentationnode)
501:
502:
503: if self.REQUEST.has_key('xslt'):
504: if not self.REQUEST['xslt']=="":
505: xsltnode=dom.createElement('xslt')
506: xsltnodetext=dom.createTextNode(self.REQUEST['xslt'])
507: xsltnode.appendChild(xsltnodetext)
508: subnode.appendChild(xsltnode)
509:
510:
511: if self.REQUEST.has_key('thumbtemplate'):
512: if not self.REQUEST['thumbtemplate']=="":
513: xsltnode=dom.createElement('thumbtemplate')
514: xsltnodetext=dom.createTextNode(self.REQUEST['thumbtemplate'])
515: xsltnode.appendChild(xsltnodetext)
516: subnode.appendChild(xsltnode)
517:
518: if self.REQUEST.has_key('topbar'):
519: if not self.REQUEST['topbar']=="":
520: xsltnode=dom.createElement('toptemplate')
521: xsltnodetext=dom.createTextNode(self.REQUEST['topbar'])
522: xsltnode.appendChild(xsltnodetext)
523: subnode.appendChild(xsltnode)
524:
525: if self.REQUEST.has_key('startpage'):
526: if not self.REQUEST['startpage']=="":
527: xsltnode=dom.createElement('startpage')
528: xsltnodetext=dom.createTextNode(self.REQUEST['startpage'])
529: xsltnode.appendChild(xsltnodetext)
530: subnode.appendChild(xsltnode)
531:
532: if self.REQUEST.has_key('project'):
533: if not self.REQUEST['project']=="":
534: xsltnode=dom.createElement('project')
535: xsltnodetext=dom.createTextNode(self.REQUEST['project'])
536: xsltnode.appendChild(xsltnodetext)
537: subnode.appendChild(xsltnode)
538:
539: if self.REQUEST.has_key('digiLibTemplate'):
540: if not self.REQUEST['digiLibTemplate']=="":
541: xsltnode=dom.createElement('digilibtemplate')
542: xsltnodetext=dom.createTextNode(self.REQUEST['digiLibTemplate'])
543: xsltnode.appendChild(xsltnodetext)
544: subnode.appendChild(xsltnode)
545:
546: node.appendChild(subnode)
547:
548: try:
549: node2=node.getElementsByTagName('bib')[0]
550: subs=node2.getElementsByTagName('lang')
551: for sub in subs:
552: print "X",sub
553: node2.removeChild(sub)
554: except:
555: """nothing"""
556: try:
557: main=dom.getElementsByTagName('bib')[0]
558: node=dom.createElement('lang')
559: textnode=dom.createTextNode(self.REQUEST['lang'])
560: print "LANG:",lang
561: node.appendChild(textnode)
562: main.appendChild(node)
563: except:
564: try:
565: subs=dom.getElementsByTagName('lang')
566: main=dom.getElementsByTagName('resource')[0]
567: for sub in subs:
568: main.removeChild(sub)
569: except:
570: """nothing"""
571:
572: try:
573: main=dom.getElementsByTagName('resource')[0]
574: node=dom.createElement('lang')
575: textnode=dom.createTextNode(self.REQUEST['lang'])
576: #print "LANG:",self.REQUEST['lang']
577: node.appendChild(textnode)
578: main.appendChild(node)
579: except:
580: """nothing"""
581:
582: writefile=file(path+"/index.meta","w")
583: writefile.write(dom.toxml().encode('utf-8'))
584: writefile.close()
585:
586:
587:
588: urllib.urlopen("http://nausikaa2.rz-berlin.mpg.de:86/cgi-bin/toc/admin/reg.cgi?path=%s"%path).readlines()
589: #return urllib.quote("http://nausikaa2.rz-berlin.mpg.de:86/cgi-bin/toc/admin/reg.cgi?path=%s"%path)
590: os.popen("ssh nausikaa2.rz-berlin.mpg.de /usr/local/mpiwg/scripts/scaleomat.pl %s /mpiwg/temp/online/scaled/thumb 90 >> /tmp/sc.out &"% re.sub('/mpiwg/online/','',self.REQUEST['path']+"/"+self.REQUEST['image']))
591:
592: #return self.REQUEST.RESPONSE.redirect(self.REQUEST['URL2']+'?path='+self.REQUEST['path'])
593:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>