1: #BAUSTELLE
2:
3:
4: """Methoden zum hinzufügen von Dokumenten ins Archiv"""
5: import archive
6: import os
7: from Products.PageTemplates.PageTemplateFile import PageTemplateFile
8: from Products.PageTemplates.PageTemplate import PageTemplate
9: import string
10: import urllib
11: import xml.dom.minidom
12:
13: #referencetypes=['Book','Bic','Journal Article','Manuscript','jiji']
14:
15: import re
16: def add(self, no_upload=0):
17: """ Add metadata or metadata and documents to the repository
18: no_upload=0 kein upload sonst upload von documententen"""
19:
20: self.referencetypes=['Book','Journal Article','Manuscript']
21:
22: newtemplate=PageTemplateFile('Products/OSA_system/OSAS_add_new').__of__(self)
23: self.REQUEST.SESSION['path']=self.REQUEST['path']
24: if no_upload==0:
25: self.REQUEST.SESSION['no_upload']='yes'
26: else:
27: if self.REQUEST.SESSION.has_key('no_upload'):
28: del self.REQUEST.SESSION['no_upload']
29:
30: return newtemplate()
31: return self.referencetypes
32:
33:
34:
35: def getISO():
36: try:
37: f=file('/usr/local/mpiwg/Zope/lib/python/Products/OSA_system/iso639-1.inc','r').readlines()
38: #f=file('OSA_system/iso639-1.inc','r').readlines()
39: ret={}
40: for lineraw in f:
41: line=lineraw.encode('ascii','replace')
42: value=string.split(line,'\t')[0].encode('ascii','replace')
43: key=string.split(line,'\t')[1].encode('ascii','replace')
44:
45: ret[key]=value
46: except:
47: ret={}
48: return ret
49:
50: def add2(self):
51: self.reftype=self.REQUEST['Reference Type']
52: self.REQUEST.SESSION['reftype']=self.reftype
53: self.bibdata={'Book':['author','year','title','series editor','series title','series volume','number of pages','city','publisher','edition','number of volumes','translator','ISBN ISSN'],
54: 'Journal Article':['author','year','title','journal','volume','issue','pages','alternate journal','call number'],
55: 'Manuscript':['author','year','title','location','signature','pages','editorial remarks','description','keywords']}
56:
57: self.fields=self.bibdata[self.reftype]
58: self.isolist=getISO()
59: #listed=[ x for x in self.isolist.keys()]
60: #print listed
61: #sorted=listed.sort()
62: #print sorted
63:
64: newtemplate=PageTemplateFile('Products/OSA_system/OSAS_add_bibdata').__of__(self)
65: return newtemplate()
66: #return self.fields
67:
68:
69: def parse_query_string(str):
70: queries={}
71: key=""
72: value=""
73: tmp=""
74: toggle="key"
75: str=urllib.unquote(str)
76: for i in str:
77: if i=="=":
78: key=tmp
79: toggle="value"
80: tmp=""
81: elif i=="&":
82: queries[key]=tmp
83: tmp=""
84: toggle="key"
85: else:
86: if toggle=="key":
87: if i=="+" : i="-"
88: else:
89: if i=="+" : i=" "
90: tmp=tmp+i
91: queries[key]=tmp
92: return queries
93:
94: def add3(self):
95: """Foldername"""
96: metadata=parse_query_string(self.REQUEST['QUERY_STRING'])
97: self.REQUEST.SESSION['metadata']=metadata
98: vorschlag_naming=metadata['author'][:5]+"_"+metadata['title'][:5]+"_"+metadata['year']
99: self.vorschlag_naming=vorschlag_naming.decode('ascii','ignore')
100: if self.REQUEST.SESSION.has_key('no_upload'):
101: self.REQUEST.SESSION['folder_name']=self.REQUEST.SESSION['path']
102: return add5(self)
103: else:
104: newtemplate=PageTemplateFile('Products/OSA_system/OSAS_add_naming').__of__(self)
105: return newtemplate()
106:
107:
108: def add4(self):
109: self.path=re.search(r"/mpiwg(.*)",self.REQUEST.SESSION['path']).group(1)
110:
111: self.folder_name=self.REQUEST['folder_name']
112: # netx has to be changed -> error if back button is used!!
113: self.REQUEST.SESSION['path']=self.REQUEST.SESSION['path']+"/"+self.REQUEST['folder_name']
114:
115:
116: self.REQUEST.SESSION['folder_name']=self.folder_name
117: self.image_folder_name="pageimg"
118: newtemplate=PageTemplateFile('Products/OSA_system/OSAS_upload').__of__(self)
119: return newtemplate()
120:
121: def add5(self):
122: """ADD INDEX.META"""
123: newtemplate=PageTemplateFile('Products/OSA_system/OSAS_add_metadata').__of__(self)
124: return newtemplate()
125:
126: def add6(self):
127: metadata=parse_query_string(self.REQUEST['QUERY_STRING'])
128: metadata['archive-path']=os.path.split(self.REQUEST.SESSION['path'])[0]
129: #metadata['folder_name']=self.REQUEST.SESSION['folder_name']
130: metadata['folder_name']=os.path.split(self.REQUEST.SESSION['path'])[1]
131: metadata['content-type']="scanned document"
132: self.reftype=self.REQUEST.SESSION['reftype']
133: self.REQUEST.SESSION['add_metadata']=metadata
134: self.add_metadata=metadata
135: self.metadata=self.REQUEST.SESSION['metadata']
136: self.metadataprint=""
137: for tag in self.metadata.keys():
138: self.metadataprint=self.metadataprint+"<"+tag+">"+self.metadata[tag]+"</"+tag+">\n"
139:
140: newtemplate=PageTemplateFile('Products/OSA_system/index_meta').__of__(self)
141: newtemplate.content_type="text/plain"
142: renderxml = newtemplate()
143: if self.REQUEST.SESSION.has_key('no_upload'):
144: metapath=self.REQUEST.SESSION['path']+"/index.meta"
145: else:
146: metapath=self.add_metadata['archive-path']+"/"+self.add_metadata['folder_name']+"/index.meta"
147:
148: f=open(metapath,'w')
149: f.writelines(renderxml)
150: f.close()
151: os.chmod(metapath,0644)
152: if self.REQUEST.SESSION.has_key('no_upload'):
153:
154: #newtemplate2=PageTemplateFile('/usr/local/mpiwg/Zope/Extensions/done',"text/html").__of__(self)
155: return self.REQUEST.response.redirect(self.REQUEST['URL2']+"?path="+self.REQUEST.SESSION['path'])
156: else:
157: #print self.add_metadata['archive-path']
158: self.viewpath=re.search(r"/mpiwg/online/(.*)",self.add_metadata['archive-path']).group(1)
159: newtemplate2=PageTemplateFile('Products/OSA_system/OSAS_saved').__of__(self)
160:
161:
162: newtemplate2.content_type="text/html"
163: self.REQUEST.response.setHeader('Content-Type','text/html')
164:
165:
166: return newtemplate2()
167:
168:
169: from time import localtime,strftime
170:
171: def date(self):
172: return strftime("%d.%m.%Y",localtime())
173:
174:
175: def addPresentation(self,path):
176: """add presentation to the path"""
177:
178: dom=xml.dom.minidom.parse(path+"/index.meta")
179:
180:
181: try:
182: author=archive.getText(dom.getElementsByTagName('author')[0].childNodes)
183: except:
184: try:
185: author=archive.getText(dom.getElementsByTagName('Author')[0].childNodes)
186: except:
187: author=archive.getText(dom.getElementsByTagName('Editor')[0].childNodes)
188:
189: title=archive.getText(dom.getElementsByTagName('title')[0].childNodes)
190: try:
191: date=archive.getText(dom.getElementsByTagName('year')[0].childNodes)
192: except:
193: try:
194: date=archive.getText(dom.getElementsByTagName('Year')[0].childNodes)
195: except:
196: date=archive.getText(dom.getElementsByTagName('date')[0].childNodes)
197: i=1
198: while os.path.exists("%02d-presentation"%i):
199: i+=1
200: self.REQUEST.SESSION['presentationname']="%02d-presentation"%i
201: self.REQUEST.SESSION['path']=path
202: self.REQUEST.SESSION['xmlvorschlag']="""<info>
203: <author>%s</author>
204: <title>%s</title>
205: <date>%s</date>
206: <display>yes</display>
207: </info>"""%(author,title,date)
208:
209: newtemplate=PageTemplateFile('Products/OSA_system/addPresentation').__of__(self)
210: return newtemplate()
211:
212: def addPresentation2(self):
213: """add presentation """
214: folder_name=self.REQUEST['folder_name']
215: #print self.REQUEST['folder_name']
216: content_description=self.REQUEST['content_description']
217:
218: path=self.REQUEST.SESSION['path']
219:
220: if not self.REQUEST.has_key('fileupload'):
221: xmlinfo=self.REQUEST['xmltext']
222: file_name="info.xml"
223:
224: else:
225: file_name=self.REQUEST['fileupload'].filename
226: xmlinfo=self.REQUEST.form['fileupload'].read()
227: # hack Multipart auswertung funktioniert nicht ausser bei mozilla
228: file_name="info.xml"
229: xmlinfo=self.REQUEST['xmltext']
230: try:
231: os.mkdir(path+"/"+folder_name)
232: except:
233: """nothing"""
234: #print "NAME:",file_name
235: f=open(path+"/"+folder_name+"/"+file_name,"w")
236: f.write(xmlinfo)
237: f.close()
238: os.chmod(path+"/"+folder_name,0755)
239: os.chmod(path+"/"+folder_name+"/"+file_name,0644)
240: addDirsToIndexMeta(path,folder_name,content_description,'presentation')
241:
242: return self.REQUEST.RESPONSE.redirect(self.REQUEST['URL2']+'?path='+path)
243:
244: def addText(self,path):
245: """add fulltext to the path"""
246: self.REQUEST.SESSION['existing_names']=['pageimg'] # to be done generate list of existing text files
247: self.REQUEST.SESSION['path']=path
248: newtemplate=PageTemplateFile('Products/OSA_system/addText').__of__(self)
249: return newtemplate()
250:
251: def addText2(self):
252: folder_name=self.REQUEST['folder_name']
253: #print self.REQUEST['folder_name']
254: content_description=self.REQUEST['content_description']
255: path=self.REQUEST.SESSION['path']
256: file_name=self.REQUEST['fileupload'].filename
257: filedata=self.REQUEST.form['fileupload'].read()
258: os.mkdir(path+"/"+folder_name)
259: f=open(path+"/"+folder_name+"/"+file_name,"w")
260: f.write(filedata)
261: f.close()
262: os.chmod(path+"/"+folder_name,0755)
263: os.chmod(path+"/"+folder_name+"/"+file_name,0644)
264: addDirsToIndexMeta(path,folder_name,content_description,'fulltext')
265:
266: return self.REQUEST.RESPONSE.redirect(self.REQUEST['URL2']+'?path='+path)
267:
268: def addTextExternal(self,path,texturl,version):
269: """hinzufügen eines externen textes"""
270: try: #neue text version einlesen
271: texttemp=urllib.urlopen(texturl).readlines()
272: text=""
273: for line in texttemp:
274: text=text+line
275: except: #fehler beim lesen des textes
276: return "ERROR: cannot read: %s"%texturl
277: if TextExternalError(text): #kein xml header
278: return "ERROR: cannot read: %s"%texturl, "received:",text
279: textpath=getNewTextPath(path) #erzeuge neuen Ornder für den Text
280: splitted=string.split(texturl,"/")
281: name=splitted[len(splitted)-1] #Name des XML-files
282: try:
283: writefile=file(path+"/"+textpath+"/"+name,"w")
284: except:
285: return"ERROR: cannot write: %s"%path+"/"+textpath+"/"+name
286: writefile.write(text)
287: writefile.close()
288: os.chmod(path+"/"+textpath+"/"+name,0644)
289:
290: #add new file to XML
291: dom=xml.dom.minidom.parse(path+"/index.meta")
292: node=dom.getElementsByTagName('resource')[0] #getNode
293:
294: subnode=dom.createElement('dir')
295:
296: namenode=dom.createElement('name')
297: namenodetext=dom.createTextNode(textpath)
298: namenode.appendChild(namenodetext)
299: subnode.appendChild(namenode)
300:
301: descriptionnode=dom.createElement('description')
302: descriptionnodetext=dom.createTextNode('archimedes text:'+version)
303: descriptionnode.appendChild(descriptionnodetext)
304: subnode.appendChild(descriptionnode)
305:
306: contentnode=dom.createElement('content-type')
307: contentnodetext=dom.createTextNode('fulltext')
308: contentnode.appendChild(contentnodetext)
309: subnode.appendChild(contentnode)
310:
311: node.appendChild(subnode)
312:
313: writefile=file(path+"/index.meta","w")
314: writefile.write(dom.toxml().encode('utf-8'))
315: writefile.close()
316:
317: #change texttool tag
318: dom=xml.dom.minidom.parse(path+"/index.meta")
319: node=dom.getElementsByTagName('meta')[0] #getNode
320:
321: try: #texttool existiert schon
322: subnode=node.getElementsByTagName('texttool')[0]
323: except: #wenn nicht Fehler ausgeben
324: return "ERROR:no presentation configured yet, user Web Front End to do so!"
325:
326:
327: try:
328: texttoolnodelist=subnode.getElementsByTagName('text')
329:
330: if not len(texttoolnodelist)==0: #texttool tag existiert schon, dann löschen
331: subsubnode=subnode.removeChild(texttoolnodelist[0])
332: subsubnode.unlink()
333: except:
334: """nothing"""
335: # text neu anlegen
336: textfoldernode=dom.createElement('text')
337: textfoldernodetext=dom.createTextNode(textpath+"/"+name)
338: textfoldernode.appendChild(textfoldernodetext)
339: subnode.appendChild(textfoldernode)
340:
341: #index.meta ausgeben
342: writefile=file(path+"/index.meta","w")
343: writefile.write(dom.toxml().encode('utf-8'))
344: writefile.close()
345:
346: #registrieren
347: return urllib.urlopen("http://nausikaa2.rz-berlin.mpg.de:86/cgi-bin/toc/admin/reg.cgi?path=%s"%path).readlines()
348:
349:
350:
351: def TextExternalError(text):
352: firsts=text[0:10]
353: #print firsts
354: try:
355: match=re.search(r".*<?xml.*",firsts)
356: except:
357: return 1
358: return 0
359:
360: def getNewTextPath(path):
361: i=1
362: while os.path.exists(path+"/fulltext%i"%i):
363: i+=1
364: os.mkdir(path+"/fulltext%i"%i)
365: os.chmod(path+"/fulltext%i"%i,0755)
366: return "fulltext%i"%i
367:
368: def addImages(self,path):
369: """Imagesfolder to the path"""
370: self.REQUEST.SESSION['existing_names']=['pageimg'] # to be done generate list of existing pageimages files
371: self.REQUEST.SESSION['path']=path
372: newtemplate=PageTemplateFile('Products/OSA_system/OSAS_addImages').__of__(self)
373: return newtemplate()
374:
375: def addImages2(self):
376:
377: self.image_folder_name=self.REQUEST['folder_name']
378: #print self.REQUEST['folder_name']
379: self.content_description=self.REQUEST['content_description']
380: #self.path=self.REQUEST.SESSION['path']
381:
382:
383: self.content_type='images'
384: addDirsToIndexMeta(self.REQUEST.SESSION['path'],self.image_folder_name,self.content_description,self.content_type)
385: self.REQUEST.SESSION['path']=re.search(r"/mpiwg(.*)",self.REQUEST.SESSION['path']).group(1)
386: newtemplate=PageTemplateFile('Products/OSA_system/OSAS_upload2').__of__(self)
387: return newtemplate()
388:
389:
390:
391: def addDirsToIndexMeta(path,folder_name,content_description,content_type):
392: #f=file(path+"/index.meta",r)
393: dom=xml.dom.minidom.parse(path+"/index.meta")
394: node=dom.getElementsByTagName('resource')[0] #getNode
395:
396: subnode=dom.createElement('dir')
397:
398: namenode=dom.createElement('name')
399: namenodetext=dom.createTextNode(folder_name)
400: namenode.appendChild(namenodetext)
401: subnode.appendChild(namenode)
402:
403: descriptionnode=dom.createElement('description')
404: descriptionnodetext=dom.createTextNode(content_description)
405: descriptionnode.appendChild(descriptionnodetext)
406: subnode.appendChild(descriptionnode)
407:
408: contentnode=dom.createElement('content-type')
409: contentnodetext=dom.createTextNode(content_type)
410: contentnode.appendChild(contentnodetext)
411: subnode.appendChild(contentnode)
412:
413: node.appendChild(subnode)
414:
415: writefile=file(path+"/index.meta","w")
416: writefile.write(dom.toxml().encode('utf-8'))
417: writefile.close()
418:
419: def readArchimedesXML(folder):
420: """gib URL aus """
421: XML=urllib.urlopen("http://archimedes.mpiwg-berlin.mpg.de/cgi-bin/toc/toc.cgi?step=xmlcorpusmanifest").read()
422: #print XML
423: dom=xml.dom.minidom.parseString(XML)
424: items=dom.getElementsByTagName('item')
425: dict={}
426:
427: for item in items:
428: #print item.attributes['dir'].value
429: try:
430: dict[item.attributes['dir'].value]=item.attributes['xml'].value
431: #print item.attributes['dir'].value,item.attributes['text'].value
432: except:
433: """nothing"""
434:
435: if dict.has_key(folder):
436: return dict[folder]
437: else:
438: return ""
439: def combineTextImage(self,path):
440: """gibt input formular zur erstellung des texttools meta tag aus"""
441: files = os.listdir(path)
442:
443: texts=[]
444: imagefolders=[]
445: presentationfolders=[]
446:
447: splitted=path.split("/")
448: externxml=readArchimedesXML(splitted[len(splitted)-1])
449:
450: for filename in files:
451: #print "FN",filename
452: if archive.isdigilib2(path+"/"+filename):
453: imagefolders.append(filename)
454:
455: if archive.isFullText(path,filename):
456: #print "HI"
457: texts.append(filename)
458: if archive.isPresentation(path,filename):
459: presentationfolders.append(filename)
460:
461: dom=xml.dom.minidom.parse(path+"/index.meta")
462: try:
463: filelanguage=archive.getText(dom.getElementsByTagName('lang')[0].childNodes)
464: except:
465: filelanguage=""
466: self.REQUEST.SESSION['isolist']=getISO()
467: self.REQUEST.SESSION['path']=path
468: self.REQUEST.SESSION['texts']=texts
469: self.REQUEST.SESSION['imagefolders']=imagefolders
470: self.REQUEST.SESSION['presentationfolders']=presentationfolders
471: self.REQUEST.SESSION['filelanguage']=filelanguage
472: self.REQUEST.SESSION['externxml']=externxml
473:
474: newtemplate=PageTemplateFile('Products/OSA_system/ImageandText').__of__(self)
475: return newtemplate()
476:
477:
478:
479: def combineTextImage2(self,path):
480: """erstellt bzw. ändert texttool meta tag"""
481: dom=xml.dom.minidom.parse(path+"/index.meta")
482: node=dom.getElementsByTagName('meta')[0] #getNode
483:
484:
485: subnodelist=node.getElementsByTagName('texttool')
486: if not len(subnodelist)==0: #texttool tag existiert schon, dann löschen
487: subnode=node.removeChild(subnodelist[0])
488: subnode.unlink()
489:
490: subnode=dom.createElement('texttool') #neu erzeugen
491:
492:
493: presentfile=os.listdir(path+"/"+self.REQUEST['presentation'])[0]
494:
495:
496: displaynode=dom.createElement('display')
497: displaynodetext=dom.createTextNode('yes')
498: displaynode.appendChild(displaynodetext)
499: subnode.appendChild(displaynode)
500:
501: if self.REQUEST.has_key('image'):
502: namenode=dom.createElement('image')
503: namenodetext=dom.createTextNode(self.REQUEST['image'])
504: namenode.appendChild(namenodetext)
505: subnode.appendChild(namenode)
506:
507: if self.REQUEST.has_key('text'):
508: textfile=os.listdir(path+"/"+self.REQUEST['text'])[0]
509: textfoldernode=dom.createElement('text')
510: textfoldernodetext=dom.createTextNode(path+"/"+self.REQUEST['text']+"/"+textfile)
511: textfoldernode.appendChild(textfoldernodetext)
512: subnode.appendChild(textfoldernode)
513:
514: if self.REQUEST.has_key('external'):#USE CVS instead of local text
515: textfoldernode=dom.createElement('text')
516: textfoldernodetext=dom.createTextNode(self.REQUEST.SESSION['externxml'])
517: textfoldernode.appendChild(textfoldernodetext)
518: subnode.appendChild(textfoldernode)
519:
520: if self.REQUEST.has_key('pagebreak'):
521: pagebreaknode=dom.createElement('pagebreak')
522: pagebreaknodetext=dom.createTextNode(self.REQUEST['pagebreak'])
523: pagebreaknode.appendChild(pagebreaknodetext)
524: subnode.appendChild(pagebreaknode)
525:
526: if self.REQUEST.has_key('presentation'):
527: presentationnode=dom.createElement('presentation')
528: presentationnodetext=dom.createTextNode(self.REQUEST['presentation']+"/"+presentfile)
529: presentationnode.appendChild(presentationnodetext)
530: subnode.appendChild(presentationnode)
531:
532:
533: if self.REQUEST.has_key('xslt'):
534: if not self.REQUEST['xslt']=="":
535: xsltnode=dom.createElement('xslt')
536: xsltnodetext=dom.createTextNode(self.REQUEST['xslt'])
537: xsltnode.appendChild(xsltnodetext)
538: subnode.appendChild(xsltnode)
539:
540: node.appendChild(subnode)
541:
542: try:
543: node2=node.getElementsByTagName('bib')[0]
544: subs=node2.getElementsByTagName('lang')
545: for sub in subs:
546: print "X",sub
547: node2.removeChild(sub)
548: except:
549: """nothing"""
550: try:
551: main=dom.getElementsByTagName('bib')[0]
552: node=dom.createElement('lang')
553: textnode=dom.createTextNode(self.REQUEST['lang'])
554: print "LANG:",lang
555: node.appendChild(textnode)
556: main.appendChild(node)
557: except:
558: try:
559: subs=dom.getElementsByTagName('lang')
560: main=dom.getElementsByTagName('resource')[0]
561: for sub in subs:
562: main.removeChild(sub)
563: except:
564: """nothing"""
565:
566: try:
567: main=dom.getElementsByTagName('resource')[0]
568: node=dom.createElement('lang')
569: textnode=dom.createTextNode(self.REQUEST['lang'])
570: #print "LANG:",self.REQUEST['lang']
571: node.appendChild(textnode)
572: main.appendChild(node)
573: except:
574: """nothing"""
575:
576: writefile=file(path+"/index.meta","w")
577: writefile.write(dom.toxml().encode('utf-8'))
578: writefile.close()
579:
580:
581:
582: urllib.urlopen("http://nausikaa2.rz-berlin.mpg.de:86/cgi-bin/toc/admin/reg.cgi?path=%s"%path).readlines()
583: #return urllib.quote("http://nausikaa2.rz-berlin.mpg.de:86/cgi-bin/toc/admin/reg.cgi?path=%s"%path)
584: os.popen("ssh nausikaa2.rz-berlin.mpg.de /usr/local/mpiwg/scripts/scaleomat.pl %s /mpiwg/temp/online/scaled/thumb 90 >> /tmp/sc.out &"% re.sub('/mpiwg/online/','',self.REQUEST['path']+"/"+self.REQUEST['image']))
585:
586: #return self.REQUEST.RESPONSE.redirect(self.REQUEST['URL2']+'?path='+self.REQUEST['path'])
587:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>