1: #BAUSTELLE
2:
3:
4: """Methoden zum hinzufügen von Dokumenten ins Archiv"""
5: import archive
6: import os
7: from Products.PageTemplates.PageTemplateFile import PageTemplateFile
8: from Products.PageTemplates.PageTemplate import PageTemplate
9: import string
10: import urllib
11: import xml.dom.minidom
12:
13: #referencetypes=['Book','Bic','Journal Article','Manuscript','jiji']
14:
15: import re
16: def add(self, no_upload=0):
17: """ Add metadata or metadata and documents to the repository
18: no_upload=0 kein upload sonst upload von documententen"""
19:
20: self.referencetypes=['Book','Journal Article','Manuscript']
21:
22: newtemplate=PageTemplateFile('Products/OSA_system/OSAS_add_new').__of__(self)
23: self.REQUEST.SESSION['path']=self.REQUEST['path']
24: if no_upload==0:
25: self.REQUEST.SESSION['no_upload']='yes'
26: else:
27: if self.REQUEST.SESSION.has_key('no_upload'):
28: del self.REQUEST.SESSION['no_upload']
29:
30: return newtemplate()
31: return self.referencetypes
32:
33:
34:
35: def getISO():
36: """ISO"""
37: try:
38: f=file('/usr/local/mpiwg/Zope/lib/python/Products/OSA_system/iso639-1.inc','r').readlines()
39: #f=file('OSA_system/iso639-1.inc','r').readlines()
40: ret={}
41: for lineraw in f:
42: line=lineraw.encode('ascii','replace')
43: value=string.split(line,'\t')[0].encode('ascii','replace')
44: key=string.split(line,'\t')[1].encode('ascii','replace')
45: ret[key]=value
46: except:
47: ret={}
48: return ret
49:
50: def add2(self):
51: self.reftype=self.REQUEST['Reference Type']
52: self.REQUEST.SESSION['reftype']=self.reftype
53: self.bibdata={'Book':['author','year','title','series editor','series title','series volume','number of pages','city','publisher','edition','number of volumes','translator','ISBN ISSN'],
54: 'Journal Article':['author','year','title','journal','volume','issue','pages','alternate journal','call number'],
55: 'Manuscript':['author','year','title','location','signature','pages','editorial remarks','description','keywords']}
56:
57: self.fields=self.bibdata[self.reftype]
58: self.isolist=getISO()
59: tmp=getISO().keys()
60: tmp.sort()
61: self.isokeys=tmp
62: #listed=[ x for x in self.isolist.keys()]
63: #print listed
64: #sorted=listed.sort()
65: #print sorted
66:
67: newtemplate=PageTemplateFile('Products/OSA_system/OSAS_add_bibdata').__of__(self)
68: return newtemplate()
69: #return self.fields
70:
71:
72: def parse_query_string(str):
73: queries={}
74: key=""
75: value=""
76: tmp=""
77: toggle="key"
78: str=urllib.unquote(str)
79: for i in str:
80: if i=="=":
81: key=tmp
82: toggle="value"
83: tmp=""
84: elif i=="&":
85: queries[key]=tmp
86: tmp=""
87: toggle="key"
88: else:
89: if toggle=="key":
90: if i=="+" : i="-"
91: else:
92: if i=="+" : i=" "
93: tmp=tmp+i
94: queries[key]=tmp
95: return queries
96:
97: def add3(self):
98: """Foldername"""
99: metadata=parse_query_string(self.REQUEST['QUERY_STRING'])
100: self.REQUEST.SESSION['metadata']=metadata
101: vorschlag_naming=metadata['author'][:5]+"_"+metadata['title'][:5]+"_"+metadata['year']
102: self.vorschlag_naming=vorschlag_naming.decode('ascii','ignore')
103: if self.REQUEST.SESSION.has_key('no_upload'):
104: self.REQUEST.SESSION['folder_name']=self.REQUEST.SESSION['path']
105: return add5(self)
106: else:
107: newtemplate=PageTemplateFile('Products/OSA_system/OSAS_add_naming').__of__(self)
108: return newtemplate()
109:
110:
111: def add4(self):
112: self.path=re.search(r"/mpiwg(.*)",self.REQUEST.SESSION['path']).group(1)
113:
114: self.folder_name=self.REQUEST['folder_name']
115: # netx has to be changed -> error if back button is used!!
116: self.REQUEST.SESSION['path']=self.REQUEST.SESSION['path']+"/"+self.REQUEST['folder_name']
117:
118:
119: self.REQUEST.SESSION['folder_name']=self.folder_name
120: self.image_folder_name="pageimg"
121: newtemplate=PageTemplateFile('Products/OSA_system/OSAS_upload').__of__(self)
122: return newtemplate()
123:
124: def add5(self):
125: """ADD INDEX.META"""
126: newtemplate=PageTemplateFile('Products/OSA_system/OSAS_add_metadata').__of__(self)
127: return newtemplate()
128:
129: def add6(self):
130: metadata=parse_query_string(self.REQUEST['QUERY_STRING'])
131: metadata['archive-path']=os.path.split(self.REQUEST.SESSION['path'])[0]
132: #metadata['folder_name']=self.REQUEST.SESSION['folder_name']
133: metadata['folder_name']=os.path.split(self.REQUEST.SESSION['path'])[1]
134: metadata['content-type']="scanned document"
135: self.reftype=self.REQUEST.SESSION['reftype']
136: self.REQUEST.SESSION['add_metadata']=metadata
137: self.add_metadata=metadata
138: self.metadata=self.REQUEST.SESSION['metadata']
139: self.metadataprint=""
140: for tag in self.metadata.keys():
141: self.metadataprint=self.metadataprint+"<"+tag+">"+self.metadata[tag]+"</"+tag+">\n"
142:
143: newtemplate=PageTemplateFile('Products/OSA_system/index_meta').__of__(self)
144: newtemplate.content_type="text/plain"
145: renderxml = newtemplate()
146: if self.REQUEST.SESSION.has_key('no_upload'):
147: metapath=self.REQUEST.SESSION['path']+"/index.meta"
148: else:
149: metapath=self.add_metadata['archive-path']+"/"+self.add_metadata['folder_name']+"/index.meta"
150:
151: f=open(metapath,'w')
152: f.writelines(renderxml)
153: f.close()
154: os.chmod(metapath,0664)
155: os.popen('chmod -R 0775 %s'%self.add_metadata['archive-path']+"/"+self.add_metadata['folder_name'])
156: if self.REQUEST.SESSION.has_key('no_upload'):
157:
158: #newtemplate2=PageTemplateFile('/usr/local/mpiwg/Zope/Extensions/done',"text/html").__of__(self)
159: return self.REQUEST.response.redirect(self.REQUEST['URL2']+"?path="+self.REQUEST.SESSION['path'])
160: else:
161: #print self.add_metadata['archive-path']
162: self.viewpath=re.search(r"/mpiwg/online/(.*)",self.add_metadata['archive-path']).group(1)
163: newtemplate2=PageTemplateFile('Products/OSA_system/OSAS_saved').__of__(self)
164:
165:
166: newtemplate2.content_type="text/html"
167: self.REQUEST.response.setHeader('Content-Type','text/html')
168:
169:
170: return newtemplate2()
171:
172:
173: from time import localtime,strftime
174:
175: def date(self):
176: return strftime("%d.%m.%Y",localtime())
177:
178:
179: def addPresentation(self,path):
180: """add presentation to the path"""
181:
182: dom=xml.dom.minidom.parse(path+"/index.meta")
183:
184:
185: try:
186: author=archive.getText(dom.getElementsByTagName('author')[0].childNodes)
187: except:
188: try:
189: author=archive.getText(dom.getElementsByTagName('Author')[0].childNodes)
190: except:
191: author=archive.getText(dom.getElementsByTagName('Editor')[0].childNodes)
192:
193: title=archive.getText(dom.getElementsByTagName('title')[0].childNodes)
194: try:
195: date=archive.getText(dom.getElementsByTagName('year')[0].childNodes)
196: except:
197: try:
198: date=archive.getText(dom.getElementsByTagName('Year')[0].childNodes)
199: except:
200: date=archive.getText(dom.getElementsByTagName('date')[0].childNodes)
201: i=1
202: while os.path.exists("%02d-presentation"%i):
203: i+=1
204: self.REQUEST.SESSION['presentationname']="%02d-presentation"%i
205: self.REQUEST.SESSION['path']=path
206: self.REQUEST.SESSION['xmlvorschlag']="""<info>
207: <author>%s</author>
208: <title>%s</title>
209: <date>%s</date>
210: <display>yes</display>
211: </info>"""%(author,title,date)
212:
213: newtemplate=PageTemplateFile('Products/OSA_system/addPresentation').__of__(self)
214: return newtemplate()
215:
216: def addPresentation2(self):
217: """add presentation """
218: folder_name=self.REQUEST['folder_name']
219: #print self.REQUEST['folder_name']
220: content_description=self.REQUEST['content_description']
221:
222: path=self.REQUEST.SESSION['path']
223:
224: if not self.REQUEST.has_key('fileupload'):
225: xmlinfo=self.REQUEST['xmltext']
226: file_name="info.xml"
227:
228: else:
229: file_name=self.REQUEST['fileupload'].filename
230: xmlinfo=self.REQUEST.form['fileupload'].read()
231: # hack Multipart auswertung funktioniert nicht ausser bei mozilla
232: file_name="info.xml"
233: xmlinfo=self.REQUEST['xmltext']
234: try:
235: os.mkdir(path+"/"+folder_name)
236: except:
237: """nothing"""
238: #print "NAME:",file_name
239: f=open(path+"/"+folder_name+"/"+file_name,"w")
240: f.write(xmlinfo)
241: f.close()
242: os.chmod(path+"/"+folder_name,0755)
243: os.chmod(path+"/"+folder_name+"/"+file_name,0644)
244: addDirsToIndexMeta(path,folder_name,content_description,'presentation')
245:
246: return self.REQUEST.RESPONSE.redirect(self.REQUEST['URL2']+'?path='+path)
247:
248: def addText(self,path):
249: """add fulltext to the path"""
250: self.REQUEST.SESSION['existing_names']=['pageimg'] # to be done generate list of existing text files
251: self.REQUEST.SESSION['path']=path
252: newtemplate=PageTemplateFile('Products/OSA_system/addText').__of__(self)
253: return newtemplate()
254:
255: def addText2(self):
256: folder_name=self.REQUEST['folder_name']
257: #print self.REQUEST['folder_name']
258: content_description=self.REQUEST['content_description']
259: path=self.REQUEST.SESSION['path']
260: file_name=self.REQUEST['fileupload'].filename
261: filedata=self.REQUEST.form['fileupload'].read()
262: os.mkdir(path+"/"+folder_name)
263: f=open(path+"/"+folder_name+"/"+file_name,"w")
264: f.write(filedata)
265: f.close()
266: os.chmod(path+"/"+folder_name,0755)
267: os.chmod(path+"/"+folder_name+"/"+file_name,0644)
268: addDirsToIndexMeta(path,folder_name,content_description,'fulltext')
269:
270: return self.REQUEST.RESPONSE.redirect(self.REQUEST['URL2']+'?path='+path)
271:
272: def addTextExternal(self,path,texturl,version):
273: """hinzufügen eines externen textes"""
274: try: #neue text version einlesen
275: texttemp=urllib.urlopen(texturl).readlines()
276: text=""
277: for line in texttemp:
278: text=text+line
279: except: #fehler beim lesen des textes
280: return "ERROR: cannot read: %s"%texturl
281: if TextExternalError(text): #kein xml header
282: return "ERROR: cannot read: %s"%texturl, "received:",text
283: textpath=getNewTextPath(path) #erzeuge neuen Ornder für den Text
284: splitted=string.split(texturl,"/")
285: name=splitted[len(splitted)-1] #Name des XML-files
286: try:
287: writefile=file(path+"/"+textpath+"/"+name,"w")
288: except:
289: return"ERROR: cannot write: %s"%path+"/"+textpath+"/"+name
290: writefile.write(text)
291: writefile.close()
292: os.chmod(path+"/"+textpath+"/"+name,0644)
293:
294: #add new file to XML
295: dom=xml.dom.minidom.parse(path+"/index.meta")
296: node=dom.getElementsByTagName('resource')[0] #getNode
297:
298: subnode=dom.createElement('dir')
299:
300: namenode=dom.createElement('name')
301: namenodetext=dom.createTextNode(textpath)
302: namenode.appendChild(namenodetext)
303: subnode.appendChild(namenode)
304:
305: descriptionnode=dom.createElement('description')
306: descriptionnodetext=dom.createTextNode('archimedes text:'+version)
307: descriptionnode.appendChild(descriptionnodetext)
308: subnode.appendChild(descriptionnode)
309:
310: contentnode=dom.createElement('content-type')
311: contentnodetext=dom.createTextNode('fulltext')
312: contentnode.appendChild(contentnodetext)
313: subnode.appendChild(contentnode)
314:
315: node.appendChild(subnode)
316:
317: writefile=file(path+"/index.meta","w")
318: writefile.write(dom.toxml().encode('utf-8'))
319: writefile.close()
320:
321: #change texttool tag
322: dom=xml.dom.minidom.parse(path+"/index.meta")
323: node=dom.getElementsByTagName('meta')[0] #getNode
324:
325: try: #texttool existiert schon
326: subnode=node.getElementsByTagName('texttool')[0]
327: except: #wenn nicht Fehler ausgeben
328: return "ERROR:no presentation configured yet, user Web Front End to do so!"
329:
330:
331: try:
332: texttoolnodelist=subnode.getElementsByTagName('text')
333:
334: if not len(texttoolnodelist)==0: #texttool tag existiert schon, dann löschen
335: subsubnode=subnode.removeChild(texttoolnodelist[0])
336: subsubnode.unlink()
337: except:
338: """nothing"""
339: # text neu anlegen
340: textfoldernode=dom.createElement('text')
341: textfoldernodetext=dom.createTextNode(textpath+"/"+name)
342: textfoldernode.appendChild(textfoldernodetext)
343: subnode.appendChild(textfoldernode)
344:
345: #index.meta ausgeben
346: writefile=file(path+"/index.meta","w")
347: writefile.write(dom.toxml().encode('utf-8'))
348: writefile.close()
349:
350: #registrieren
351: return urllib.urlopen("http://nausikaa2.rz-berlin.mpg.de:86/cgi-bin/toc/admin/reg.cgi?path=%s"%path).readlines()
352:
353:
354:
355: def TextExternalError(text):
356: firsts=text[0:10]
357: #print firsts
358: try:
359: match=re.search(r".*<?xml.*",firsts)
360: except:
361: return 1
362: return 0
363:
364: def getNewTextPath(path):
365: i=1
366: while os.path.exists(path+"/fulltext%i"%i):
367: i+=1
368: os.mkdir(path+"/fulltext%i"%i)
369: os.chmod(path+"/fulltext%i"%i,0755)
370: return "fulltext%i"%i
371:
372: def addImages(self,path):
373: """Imagesfolder to the path"""
374: self.REQUEST.SESSION['existing_names']=['pageimg'] # to be done generate list of existing pageimages files
375: self.REQUEST.SESSION['path']=path
376: newtemplate=PageTemplateFile('Products/OSA_system/OSAS_addImages').__of__(self)
377: return newtemplate()
378:
379: def addImages2(self):
380:
381: self.image_folder_name=self.REQUEST['folder_name']
382: #print self.REQUEST['folder_name']
383: self.content_description=self.REQUEST['content_description']
384: #self.path=self.REQUEST.SESSION['path']
385:
386:
387: self.content_type='images'
388: addDirsToIndexMeta(self.REQUEST.SESSION['path'],self.image_folder_name,self.content_description,self.content_type)
389: self.REQUEST.SESSION['path']=re.search(r"/mpiwg(.*)",self.REQUEST.SESSION['path']).group(1)
390: newtemplate=PageTemplateFile('Products/OSA_system/OSAS_upload2').__of__(self)
391: return newtemplate()
392:
393:
394:
395: def addDirsToIndexMeta(path,folder_name,content_description,content_type):
396: #f=file(path+"/index.meta",r)
397: dom=xml.dom.minidom.parse(path+"/index.meta")
398: node=dom.getElementsByTagName('resource')[0] #getNode
399:
400: subnode=dom.createElement('dir')
401:
402: namenode=dom.createElement('name')
403: namenodetext=dom.createTextNode(folder_name)
404: namenode.appendChild(namenodetext)
405: subnode.appendChild(namenode)
406:
407: descriptionnode=dom.createElement('description')
408: descriptionnodetext=dom.createTextNode(content_description)
409: descriptionnode.appendChild(descriptionnodetext)
410: subnode.appendChild(descriptionnode)
411:
412: contentnode=dom.createElement('content-type')
413: contentnodetext=dom.createTextNode(content_type)
414: contentnode.appendChild(contentnodetext)
415: subnode.appendChild(contentnode)
416:
417: node.appendChild(subnode)
418:
419: writefile=file(path+"/index.meta","w")
420: writefile.write(dom.toxml().encode('utf-8'))
421: writefile.close()
422:
423: def readArchimedesXML(folder):
424: """gib URL aus """
425: XML=urllib.urlopen("http://archimedes.mpiwg-berlin.mpg.de/cgi-bin/toc/toc.cgi?step=xmlcorpusmanifest").read()
426: #print XML
427: dom=xml.dom.minidom.parseString(XML)
428: items=dom.getElementsByTagName('item')
429: dict={}
430:
431: for item in items:
432: #print item.attributes['dir'].value
433: try:
434: dict[item.attributes['dir'].value]=item.attributes['xml'].value
435: #print item.attributes['dir'].value,item.attributes['text'].value
436: except:
437: """nothing"""
438:
439: if dict.has_key(folder):
440: return dict[folder]
441: else:
442: return ""
443: def combineTextImage(self,path):
444: """gibt input formular zur erstellung des texttools meta tag aus"""
445: files = os.listdir(path)
446:
447: texts=[]
448: imagefolders=[]
449: presentationfolders=[]
450:
451: splitted=path.split("/")
452: externxml=readArchimedesXML(splitted[len(splitted)-1])
453:
454: for filename in files:
455: #print "FN",filename
456: if archive.isdigilib2(path+"/"+filename):
457: imagefolders.append(filename)
458:
459: if archive.isFullText(path,filename):
460: #print "HI"
461: texts.append(filename)
462: if archive.isPresentation(path,filename):
463: presentationfolders.append(filename)
464:
465: dom=xml.dom.minidom.parse(path+"/index.meta")
466: try:
467: filelanguage=archive.getText(dom.getElementsByTagName('lang')[0].childNodes)
468: except:
469: filelanguage=""
470: self.REQUEST.SESSION['isolist']=getISO()
471: tmp=getISO().keys()
472: tmp.sort()
473: self.REQUEST.SESSION['isolistsort']=tmp
474: self.REQUEST.SESSION['path']=path
475: self.REQUEST.SESSION['texts']=texts
476: self.REQUEST.SESSION['imagefolders']=imagefolders
477: self.REQUEST.SESSION['presentationfolders']=presentationfolders
478: self.REQUEST.SESSION['filelanguage']=filelanguage
479: self.REQUEST.SESSION['externxml']=externxml
480:
481: newtemplate=PageTemplateFile('Products/OSA_system/ImageandText').__of__(self)
482: return newtemplate()
483:
484:
485:
486: def combineTextImage2(self,path):
487: """erstellt bzw. ändert texttool meta tag"""
488: dom=xml.dom.minidom.parse(path+"/index.meta")
489: node=dom.getElementsByTagName('meta')[0] #getNode
490:
491:
492: subnodelist=node.getElementsByTagName('texttool')
493: if not len(subnodelist)==0: #texttool tag existiert schon, dann löschen
494: subnode=node.removeChild(subnodelist[0])
495: subnode.unlink()
496:
497: subnode=dom.createElement('texttool') #neu erzeugen
498:
499:
500: presentfile=os.listdir(path+"/"+self.REQUEST['presentation'])[0]
501:
502:
503: displaynode=dom.createElement('display')
504: displaynodetext=dom.createTextNode('yes')
505: displaynode.appendChild(displaynodetext)
506: subnode.appendChild(displaynode)
507:
508: if self.REQUEST.has_key('image'):
509: namenode=dom.createElement('image')
510: namenodetext=dom.createTextNode(self.REQUEST['image'])
511: namenode.appendChild(namenodetext)
512: subnode.appendChild(namenode)
513:
514: if self.REQUEST.has_key('text'):
515: textfile=os.listdir(path+"/"+self.REQUEST['text'])[0]
516: textfoldernode=dom.createElement('text')
517: textfoldernodetext=dom.createTextNode(path+"/"+self.REQUEST['text']+"/"+textfile)
518: textfoldernode.appendChild(textfoldernodetext)
519: subnode.appendChild(textfoldernode)
520:
521: if self.REQUEST.has_key('external'):#USE CVS instead of local text
522: textfoldernode=dom.createElement('text')
523: textfoldernodetext=dom.createTextNode(self.REQUEST.SESSION['externxml'])
524: textfoldernode.appendChild(textfoldernodetext)
525: subnode.appendChild(textfoldernode)
526:
527: if self.REQUEST.has_key('pagebreak'):
528: pagebreaknode=dom.createElement('pagebreak')
529: pagebreaknodetext=dom.createTextNode(self.REQUEST['pagebreak'])
530: pagebreaknode.appendChild(pagebreaknodetext)
531: subnode.appendChild(pagebreaknode)
532:
533: if self.REQUEST.has_key('presentation'):
534: presentationnode=dom.createElement('presentation')
535: presentationnodetext=dom.createTextNode(self.REQUEST['presentation']+"/"+presentfile)
536: presentationnode.appendChild(presentationnodetext)
537: subnode.appendChild(presentationnode)
538:
539:
540: if self.REQUEST.has_key('xslt'):
541: if not self.REQUEST['xslt']=="":
542: xsltnode=dom.createElement('xslt')
543: xsltnodetext=dom.createTextNode(self.REQUEST['xslt'])
544: xsltnode.appendChild(xsltnodetext)
545: subnode.appendChild(xsltnode)
546:
547: node.appendChild(subnode)
548:
549: try:
550: node2=node.getElementsByTagName('bib')[0]
551: subs=node2.getElementsByTagName('lang')
552: for sub in subs:
553: print "X",sub
554: node2.removeChild(sub)
555: except:
556: """nothing"""
557: try:
558: main=dom.getElementsByTagName('bib')[0]
559: node=dom.createElement('lang')
560: textnode=dom.createTextNode(self.REQUEST['lang'])
561: print "LANG:",lang
562: node.appendChild(textnode)
563: main.appendChild(node)
564: except:
565: try:
566: subs=dom.getElementsByTagName('lang')
567: main=dom.getElementsByTagName('resource')[0]
568: for sub in subs:
569: main.removeChild(sub)
570: except:
571: """nothing"""
572:
573: try:
574: main=dom.getElementsByTagName('resource')[0]
575: node=dom.createElement('lang')
576: textnode=dom.createTextNode(self.REQUEST['lang'])
577: #print "LANG:",self.REQUEST['lang']
578: node.appendChild(textnode)
579: main.appendChild(node)
580: except:
581: """nothing"""
582:
583: writefile=file(path+"/index.meta","w")
584: writefile.write(dom.toxml().encode('utf-8'))
585: writefile.close()
586:
587:
588:
589: urllib.urlopen("http://nausikaa2.rz-berlin.mpg.de:86/cgi-bin/toc/admin/reg.cgi?path=%s"%path).readlines()
590: #return urllib.quote("http://nausikaa2.rz-berlin.mpg.de:86/cgi-bin/toc/admin/reg.cgi?path=%s"%path)
591: os.popen("ssh nausikaa2.rz-berlin.mpg.de /usr/local/mpiwg/scripts/scaleomat.pl %s /mpiwg/temp/online/scaled/thumb 90 >> /tmp/sc.out &"% re.sub('/mpiwg/online/','',self.REQUEST['path']+"/"+self.REQUEST['image']))
592:
593: #return self.REQUEST.RESPONSE.redirect(self.REQUEST['URL2']+'?path='+self.REQUEST['path'])
594:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>