1: #BAUSTELLE
2:
3:
4: """Methoden zum hinzufügen von Dokumenten ins Archiv"""
5: import archive
6: import os
7: from Products.PageTemplates.PageTemplateFile import PageTemplateFile
8: from Products.PageTemplates.PageTemplate import PageTemplate
9: import string
10: import urllib
11: import xml.dom.minidom
12:
13: #referencetypes=['Book','Bic','Journal Article','Manuscript','jiji']
14:
15: import re
16: def add(self, no_upload=0):
17: """ Add metadata or metadata and documents to the repository
18: no_upload=0 kein upload sonst upload von documententen"""
19:
20: self.referencetypes=['Book','Journal Article','Manuscript']
21:
22: newtemplate=PageTemplateFile('Products/OSA_system/OSAS_add_new').__of__(self)
23: self.REQUEST.SESSION['path']=self.REQUEST['path']
24: if no_upload==0:
25: self.REQUEST.SESSION['no_upload']='yes'
26: else:
27: if self.REQUEST.SESSION.has_key('no_upload'):
28: del self.REQUEST.SESSION['no_upload']
29:
30: return newtemplate()
31: return self.referencetypes
32:
33:
34:
35: def getISO():
36: f=file('/usr/local/mpiwg/Zope/lib/python/Products/OSA_system/iso639-1.inc','r').readlines()
37: ret={}
38: for lineraw in f:
39: line=lineraw.encode('ascii','replace')
40: value=string.split(line,'\t')[0].encode('ascii','replace')
41: key=string.split(line,'\t')[1].encode('ascii','replace')
42:
43: ret[key]=value
44: return ret
45:
46: def add2(self):
47: self.reftype=self.REQUEST['Reference Type']
48: self.REQUEST.SESSION['reftype']=self.reftype
49: self.bibdata={'Book':['author','year','title','series editor','series title','series volume','number of pages','city','publisher','edition','number of volumes','translator','ISBN ISSN'],
50: 'Journal Article':['author','year','title','journal','volume','issue','pages','alternate journal','call number'],
51: 'Manuscript':['author','year','title','location','signature','pages','editorial remarks','description','keywords']}
52:
53: self.fields=self.bibdata[self.reftype]
54: self.isolist=getISO()
55: #listed=[ x for x in self.isolist.keys()]
56: #print listed
57: #sorted=listed.sort()
58: #print sorted
59:
60: newtemplate=PageTemplateFile('Products/OSA_system/OSAS_add_bibdata').__of__(self)
61: return newtemplate()
62: #return self.fields
63:
64:
65: def parse_query_string(str):
66: queries={}
67: key=""
68: value=""
69: tmp=""
70: toggle="key"
71: str=urllib.unquote(str)
72: for i in str:
73: if i=="=":
74: key=tmp
75: toggle="value"
76: tmp=""
77: elif i=="&":
78: queries[key]=tmp
79: tmp=""
80: toggle="key"
81: else:
82: if toggle=="key":
83: if i=="+" : i="-"
84: else:
85: if i=="+" : i=" "
86: tmp=tmp+i
87: queries[key]=tmp
88: return queries
89:
90: def add3(self):
91: """Foldername"""
92: metadata=parse_query_string(self.REQUEST['QUERY_STRING'])
93: self.REQUEST.SESSION['metadata']=metadata
94: vorschlag_naming=metadata['author'][:5]+"_"+metadata['title'][:5]+"_"+metadata['year']
95: self.vorschlag_naming=vorschlag_naming.decode('ascii','ignore')
96: if self.REQUEST.SESSION.has_key('no_upload'):
97: self.REQUEST.SESSION['folder_name']=self.REQUEST.SESSION['path']
98: return add5(self)
99: else:
100: newtemplate=PageTemplateFile('Products/OSA_system/OSAS_add_naming').__of__(self)
101: return newtemplate()
102:
103:
104: def add4(self):
105: self.path=re.search(r"/mpiwg(.*)",self.REQUEST.SESSION['path']).group(1)
106:
107: self.folder_name=self.REQUEST['folder_name']
108: # netx has to be changed -> error if back button is used!!
109: self.REQUEST.SESSION['path']=self.REQUEST.SESSION['path']+"/"+self.REQUEST['folder_name']
110:
111:
112: self.REQUEST.SESSION['folder_name']=self.folder_name
113: self.image_folder_name="pageimg"
114: newtemplate=PageTemplateFile('Products/OSA_system/OSAS_upload').__of__(self)
115: return newtemplate()
116:
117: def add5(self):
118: """ADD INDEX.META"""
119: newtemplate=PageTemplateFile('Products/OSA_system/OSAS_add_metadata').__of__(self)
120: return newtemplate()
121:
122: def add6(self):
123: metadata=parse_query_string(self.REQUEST['QUERY_STRING'])
124: metadata['archive-path']=os.path.split(self.REQUEST.SESSION['path'])[0]
125: #metadata['folder_name']=self.REQUEST.SESSION['folder_name']
126: metadata['folder_name']=os.path.split(self.REQUEST.SESSION['path'])[1]
127: metadata['content-type']="scanned document"
128: self.reftype=self.REQUEST.SESSION['reftype']
129: self.REQUEST.SESSION['add_metadata']=metadata
130: self.add_metadata=metadata
131: self.metadata=self.REQUEST.SESSION['metadata']
132: self.metadataprint=""
133: for tag in self.metadata.keys():
134: self.metadataprint=self.metadataprint+"<"+tag+">"+self.metadata[tag]+"</"+tag+">\n"
135:
136: newtemplate=PageTemplateFile('Products/OSA_system/index_meta').__of__(self)
137: newtemplate.content_type="text/plain"
138: renderxml = newtemplate()
139: if self.REQUEST.SESSION.has_key('no_upload'):
140: metapath=self.REQUEST.SESSION['path']+"/index.meta"
141: else:
142: metapath=self.add_metadata['archive-path']+"/"+self.add_metadata['folder_name']+"/index.meta"
143:
144: f=open(metapath,'w')
145: f.writelines(renderxml)
146: f.close()
147: os.chmod(metapath,0644)
148: if self.REQUEST.SESSION.has_key('no_upload'):
149:
150: #newtemplate2=PageTemplateFile('/usr/local/mpiwg/Zope/Extensions/done',"text/html").__of__(self)
151: return self.REQUEST.response.redirect(self.REQUEST['URL2']+"?path="+self.REQUEST.SESSION['path'])
152: else:
153: #print self.add_metadata['archive-path']
154: self.viewpath=re.search(r"/mpiwg/online/(.*)",self.add_metadata['archive-path']).group(1)
155: newtemplate2=PageTemplateFile('Products/OSA_system/OSAS_saved').__of__(self)
156:
157:
158: newtemplate2.content_type="text/html"
159: self.REQUEST.response.setHeader('Content-Type','text/html')
160:
161:
162: return newtemplate2()
163:
164:
165: from time import localtime,strftime
166:
167: def date(self):
168: return strftime("%d.%m.%Y",localtime())
169:
170:
171: def addPresentation(self,path):
172: """add presentation to the path"""
173:
174: dom=xml.dom.minidom.parse(path+"/index.meta")
175:
176:
177: try:
178: author=archive.getText(dom.getElementsByTagName('author')[0].childNodes)
179: except:
180: author=archive.getText(dom.getElementsByTagName('Author')[0].childNodes)
181: title=archive.getText(dom.getElementsByTagName('title')[0].childNodes)
182: try:
183: date=archive.getText(dom.getElementsByTagName('year')[0].childNodes)
184: except:
185: try:
186: date=archive.getText(dom.getElementsByTagName('Year')[0].childNodes)
187: except:
188: date=archive.getText(dom.getElementsByTagName('date')[0].childNodes)
189: i=1
190: while os.path.exists("%02d-presentation"%i):
191: i+=1
192: self.REQUEST.SESSION['presentationname']="%02d-presentation"%i
193: self.REQUEST.SESSION['path']=path
194: self.REQUEST.SESSION['xmlvorschlag']="""<info>
195: <author>%s</author>
196: <title>%s</title>
197: <date>%s</date>
198: <display>yes</display>
199: </info>"""%(author,title,date)
200:
201: newtemplate=PageTemplateFile('Products/OSA_system/addPresentation').__of__(self)
202: return newtemplate()
203:
204: def addPresentation2(self):
205: """add presentation """
206: folder_name=self.REQUEST['folder_name']
207: #print self.REQUEST['folder_name']
208: content_description=self.REQUEST['content_description']
209:
210: path=self.REQUEST.SESSION['path']
211:
212: if not self.REQUEST.has_key('fileupload'):
213: xmlinfo=self.REQUEST['xmltext']
214: file_name="info.xml"
215:
216: else:
217: file_name=self.REQUEST['fileupload'].filename
218: xmlinfo=self.REQUEST.form['fileupload'].read()
219: # hack Multipart auswertung funktioniert nicht ausser bei mozilla
220: file_name="info.xml"
221: xmlinfo=self.REQUEST['xmltext']
222: try:
223: os.mkdir(path+"/"+folder_name)
224: except:
225: """nothing"""
226: #print "NAME:",file_name
227: f=open(path+"/"+folder_name+"/"+file_name,"w")
228: f.write(xmlinfo)
229: f.close()
230: os.chmod(path+"/"+folder_name,0755)
231: os.chmod(path+"/"+folder_name+"/"+file_name,0644)
232: addDirsToIndexMeta(path,folder_name,content_description,'presentation')
233:
234: return self.REQUEST.RESPONSE.redirect(self.REQUEST['URL2']+'?path='+path)
235:
236: def addText(self,path):
237: """add fulltext to the path"""
238: self.REQUEST.SESSION['existing_names']=['pageimg'] # to be done generate list of existing text files
239: self.REQUEST.SESSION['path']=path
240: newtemplate=PageTemplateFile('Products/OSA_system/addText').__of__(self)
241: return newtemplate()
242:
243: def addText2(self):
244: folder_name=self.REQUEST['folder_name']
245: #print self.REQUEST['folder_name']
246: content_description=self.REQUEST['content_description']
247: path=self.REQUEST.SESSION['path']
248: file_name=self.REQUEST['fileupload'].filename
249: filedata=self.REQUEST.form['fileupload'].read()
250: os.mkdir(path+"/"+folder_name)
251: f=open(path+"/"+folder_name+"/"+file_name,"w")
252: f.write(filedata)
253: f.close()
254: os.chmod(path+"/"+folder_name,0755)
255: os.chmod(path+"/"+folder_name+"/"+file_name,0644)
256: addDirsToIndexMeta(path,folder_name,content_description,'fulltext')
257:
258: return self.REQUEST.RESPONSE.redirect(self.REQUEST['URL2']+'?path='+path)
259:
260: def addTextExternal(self,path,texturl,version):
261: """hinzufügen eines externen textes"""
262: try: #neue text version einlesen
263: texttemp=urllib.urlopen(texturl).readlines()
264: text=""
265: for line in texttemp:
266: text=text+line
267: except: #fehler beim lesen des textes
268: return "ERROR: cannot read: %s"%texturl
269: if TextExternalError(text): #kein xml header
270: return "ERROR: cannot read: %s"%texturl, "received:",text
271: textpath=getNewTextPath(path) #erzeuge neuen Ornder für den Text
272: splitted=string.split(texturl,"/")
273: name=splitted[len(splitted)-1] #Name des XML-files
274: try:
275: writefile=file(path+"/"+textpath+"/"+name,"w")
276: except:
277: return"ERROR: cannot write: %s"%path+"/"+textpath+"/"+name
278: writefile.write(text)
279: writefile.close()
280: os.chmod(path+"/"+textpath+"/"+name,0644)
281:
282: #add new file to XML
283: dom=xml.dom.minidom.parse(path+"/index.meta")
284: node=dom.getElementsByTagName('resource')[0] #getNode
285:
286: subnode=dom.createElement('dir')
287:
288: namenode=dom.createElement('name')
289: namenodetext=dom.createTextNode(textpath)
290: namenode.appendChild(namenodetext)
291: subnode.appendChild(namenode)
292:
293: descriptionnode=dom.createElement('description')
294: descriptionnodetext=dom.createTextNode('archimedes text:'+version)
295: descriptionnode.appendChild(descriptionnodetext)
296: subnode.appendChild(descriptionnode)
297:
298: contentnode=dom.createElement('content-type')
299: contentnodetext=dom.createTextNode('fulltext')
300: contentnode.appendChild(contentnodetext)
301: subnode.appendChild(contentnode)
302:
303: node.appendChild(subnode)
304:
305: writefile=file(path+"/index.meta","w")
306: writefile.write(dom.toxml().encode('utf-8'))
307: writefile.close()
308:
309: #change texttool tag
310: dom=xml.dom.minidom.parse(path+"/index.meta")
311: node=dom.getElementsByTagName('meta')[0] #getNode
312:
313: try: #texttool existiert schon
314: subnode=node.getElementsByTagName('texttool')[0]
315: except: #wenn nicht Fehler ausgeben
316: return "ERROR:no presentation configured yet, user Web Front End to do so!"
317:
318:
319: try:
320: texttoolnodelist=subnode.getElementsByTagName('text')
321:
322: if not len(texttoolnodelist)==0: #texttool tag existiert schon, dann löschen
323: subsubnode=subnode.removeChild(texttoolnodelist[0])
324: subsubnode.unlink()
325: except:
326: """nothing"""
327: # text neu anlegen
328: textfoldernode=dom.createElement('text')
329: textfoldernodetext=dom.createTextNode(textpath+"/"+name)
330: textfoldernode.appendChild(textfoldernodetext)
331: subnode.appendChild(textfoldernode)
332:
333: #index.meta ausgeben
334: writefile=file(path+"/index.meta","w")
335: writefile.write(dom.toxml().encode('utf-8'))
336: writefile.close()
337:
338: #registrieren
339: return urllib.urlopen("http://nausikaa2.rz-berlin.mpg.de:86/cgi-bin/toc/admin/reg.cgi?path=%s"%path).readlines()
340:
341:
342:
343: def TextExternalError(text):
344: firsts=text[0:10]
345: #print firsts
346: try:
347: match=re.search(r".*<?xml.*",firsts)
348: except:
349: return 1
350: return 0
351:
352: def getNewTextPath(path):
353: i=1
354: while os.path.exists(path+"/fulltext%i"%i):
355: i+=1
356: os.mkdir(path+"/fulltext%i"%i)
357: os.chmod(path+"/fulltext%i"%i,0755)
358: return "fulltext%i"%i
359:
360: def addImages(self,path):
361: """Imagesfolder to the path"""
362: self.REQUEST.SESSION['existing_names']=['pageimg'] # to be done generate list of existing pageimages files
363: self.REQUEST.SESSION['path']=path
364: newtemplate=PageTemplateFile('Products/OSA_system/OSAS_addImages').__of__(self)
365: return newtemplate()
366:
367: def addImages2(self):
368:
369: self.image_folder_name=self.REQUEST['folder_name']
370: #print self.REQUEST['folder_name']
371: self.content_description=self.REQUEST['content_description']
372: #self.path=self.REQUEST.SESSION['path']
373:
374:
375: self.content_type='images'
376: addDirsToIndexMeta(self.REQUEST.SESSION['path'],self.image_folder_name,self.content_description,self.content_type)
377: self.REQUEST.SESSION['path']=re.search(r"/mpiwg(.*)",self.REQUEST.SESSION['path']).group(1)
378: newtemplate=PageTemplateFile('Products/OSA_system/OSAS_upload2').__of__(self)
379: return newtemplate()
380:
381:
382:
383: def addDirsToIndexMeta(path,folder_name,content_description,content_type):
384: #f=file(path+"/index.meta",r)
385: dom=xml.dom.minidom.parse(path+"/index.meta")
386: node=dom.getElementsByTagName('resource')[0] #getNode
387:
388: subnode=dom.createElement('dir')
389:
390: namenode=dom.createElement('name')
391: namenodetext=dom.createTextNode(folder_name)
392: namenode.appendChild(namenodetext)
393: subnode.appendChild(namenode)
394:
395: descriptionnode=dom.createElement('description')
396: descriptionnodetext=dom.createTextNode(content_description)
397: descriptionnode.appendChild(descriptionnodetext)
398: subnode.appendChild(descriptionnode)
399:
400: contentnode=dom.createElement('content-type')
401: contentnodetext=dom.createTextNode(content_type)
402: contentnode.appendChild(contentnodetext)
403: subnode.appendChild(contentnode)
404:
405: node.appendChild(subnode)
406:
407: writefile=file(path+"/index.meta","w")
408: writefile.write(dom.toxml().encode('utf-8'))
409: writefile.close()
410:
411: def readArchimedesXML(folder):
412: """gib URL aus """
413: XML=urllib.urlopen("http://archimedes.mpiwg-berlin.mpg.de/cgi-bin/toc/toc.cgi?step=xmlcorpusmanifest").read()
414: #print XML
415: dom=xml.dom.minidom.parseString(XML)
416: items=dom.getElementsByTagName('item')
417: dict={}
418:
419: for item in items:
420: #print item.attributes['dir'].value
421: try:
422: dict[item.attributes['dir'].value]=item.attributes['xml'].value
423: #print item.attributes['dir'].value,item.attributes['text'].value
424: except:
425: """nothing"""
426:
427: if dict.has_key(folder):
428: return dict[folder]
429: else:
430: return ""
431: def combineTextImage(self,path):
432: """gibt input formular zur erstellung des texttools meta tag aus"""
433: files = os.listdir(path)
434:
435: texts=[]
436: imagefolders=[]
437: presentationfolders=[]
438:
439: splitted=path.split("/")
440: externxml=readArchimedesXML(splitted[len(splitted)-1])
441:
442: for filename in files:
443: #print "FN",filename
444: if archive.isdigilib2(path+"/"+filename):
445: imagefolders.append(filename)
446:
447: if archive.isFullText(path,filename):
448: #print "HI"
449: texts.append(filename)
450: if archive.isPresentation(path,filename):
451: presentationfolders.append(filename)
452:
453: dom=xml.dom.minidom.parse(path+"/index.meta")
454: try:
455: filelanguage=archive.getText(dom.getElementsByTagName('lang')[0].childNodes)
456: except:
457: filelanguage=""
458: self.REQUEST.SESSION['isolist']=getISO()
459: self.REQUEST.SESSION['path']=path
460: self.REQUEST.SESSION['texts']=texts
461: self.REQUEST.SESSION['imagefolders']=imagefolders
462: self.REQUEST.SESSION['presentationfolders']=presentationfolders
463: self.REQUEST.SESSION['filelanguage']=filelanguage
464: self.REQUEST.SESSION['externxml']=externxml
465:
466: newtemplate=PageTemplateFile('Products/OSA_system/ImageandText').__of__(self)
467: return newtemplate()
468:
469:
470:
471: def combineTextImage2(self,path):
472: """erstellt bzw. ändert texttool meta tag"""
473: dom=xml.dom.minidom.parse(path+"/index.meta")
474: node=dom.getElementsByTagName('meta')[0] #getNode
475:
476:
477: subnodelist=node.getElementsByTagName('texttool')
478: if not len(subnodelist)==0: #texttool tag existiert schon, dann löschen
479: subnode=node.removeChild(subnodelist[0])
480: subnode.unlink()
481:
482: subnode=dom.createElement('texttool') #neu erzeugen
483:
484:
485: presentfile=os.listdir(path+"/"+self.REQUEST['presentation'])[0]
486:
487:
488: displaynode=dom.createElement('display')
489: displaynodetext=dom.createTextNode('yes')
490: displaynode.appendChild(displaynodetext)
491: subnode.appendChild(displaynode)
492:
493: if self.REQUEST.has_key('image'):
494: namenode=dom.createElement('image')
495: namenodetext=dom.createTextNode(self.REQUEST['image'])
496: namenode.appendChild(namenodetext)
497: subnode.appendChild(namenode)
498:
499: if self.REQUEST.has_key('text'):
500: textfile=os.listdir(path+"/"+self.REQUEST['text'])[0]
501: textfoldernode=dom.createElement('text')
502: textfoldernodetext=dom.createTextNode(path+"/"+self.REQUEST['text']+"/"+textfile)
503: textfoldernode.appendChild(textfoldernodetext)
504: subnode.appendChild(textfoldernode)
505:
506: if self.REQUEST.has_key('external'):#USE CVS instead of local text
507: textfoldernode=dom.createElement('text')
508: textfoldernodetext=dom.createTextNode(self.REQUEST.SESSION['externxml'])
509: textfoldernode.appendChild(textfoldernodetext)
510: subnode.appendChild(textfoldernode)
511:
512: if self.REQUEST.has_key('pagebreak'):
513: pagebreaknode=dom.createElement('pagebreak')
514: pagebreaknodetext=dom.createTextNode(self.REQUEST['pagebreak'])
515: pagebreaknode.appendChild(pagebreaknodetext)
516: subnode.appendChild(pagebreaknode)
517:
518: if self.REQUEST.has_key('presentation'):
519: presentationnode=dom.createElement('presentation')
520: presentationnodetext=dom.createTextNode(self.REQUEST['presentation']+"/"+presentfile)
521: presentationnode.appendChild(presentationnodetext)
522: subnode.appendChild(presentationnode)
523:
524: node.appendChild(subnode)
525:
526: try:
527: node2=node.getElementsByTagName('bib')[0]
528: subs=node2.getElementsByTagName('lang')
529: for sub in subs:
530: print "X",sub
531: node2.removeChild(sub)
532: except:
533: """nothing"""
534: try:
535: main=dom.getElementsByTagName('bib')[0]
536: node=dom.createElement('lang')
537: textnode=dom.createTextNode(self.REQUEST['lang'])
538: print "LANG:",lang
539: node.appendChild(textnode)
540: main.appendChild(node)
541: except:
542: try:
543: subs=dom.getElementsByTagName('lang')
544: main=dom.getElementsByTagName('resource')[0]
545: for sub in subs:
546: main.removeChild(sub)
547: except:
548: """nothing"""
549:
550: try:
551: main=dom.getElementsByTagName('resource')[0]
552: node=dom.createElement('lang')
553: textnode=dom.createTextNode(self.REQUEST['lang'])
554: #print "LANG:",self.REQUEST['lang']
555: node.appendChild(textnode)
556: main.appendChild(node)
557: except:
558: """nothing"""
559:
560: writefile=file(path+"/index.meta","w")
561: writefile.write(dom.toxml().encode('utf-8'))
562: writefile.close()
563:
564:
565:
566: urllib.urlopen("http://nausikaa2.rz-berlin.mpg.de:86/cgi-bin/toc/admin/reg.cgi?path=%s"%path).readlines()
567: #return urllib.quote("http://nausikaa2.rz-berlin.mpg.de:86/cgi-bin/toc/admin/reg.cgi?path=%s"%path)
568: os.popen("ssh nausikaa2.rz-berlin.mpg.de /usr/local/mpiwg/scripts/scaleomat.pl %s /mpiwg/temp/online/scaled/thumb 90 &"% re.sub('mpiwg/online/','',self.REQUEST['path']+"/"+self.REQUEST['image']))
569:
570: #return self.REQUEST.RESPONSE.redirect(self.REQUEST['URL2']+'?path='+self.REQUEST['path'])
571:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>