version 1.3, 2004/01/07 15:29:01
|
version 1.6, 2004/04/14 21:38:02
|
Line 2
|
Line 2
|
import xml.dom.minidom |
import xml.dom.minidom |
import os.path |
import os.path |
import re |
import re |
|
import urllib |
|
|
|
|
|
def readArchimedesXML(folder): |
|
"""gib URL aus """ |
|
try: |
|
XML=urllib.urlopen("http://archimedes.mpiwg-berlin.mpg.de/cgi-bin/toc/toc.cgi?step=xmlcorpusmanifest").read() |
|
#print XML |
|
dom=xml.dom.minidom.parseString(XML) |
|
items=dom.getElementsByTagName('item') |
|
dict={} |
|
|
|
for item in items: |
|
#print item.attributes['dir'].value |
|
try: |
|
dict[item.attributes['dir'].value]=item.attributes['xml'].value |
|
#print item.attributes['dir'].value,item.attributes['text'].value |
|
except: |
|
"""nothing""" |
|
|
|
if dict.has_key(folder): |
|
return dict[folder] |
|
else: |
|
return "" |
|
except: |
|
return "" |
|
|
|
|
|
|
def getText(nodelist): |
def getText(nodelist): |
|
|
Line 51 def changeNodesInIndexMeta(path,node,sub
|
Line 79 def changeNodesInIndexMeta(path,node,sub
|
"""nothing""" |
"""nothing""" |
|
|
namenode=dom.createElement(re.sub(r' ','-',subnode)) |
namenode=dom.createElement(re.sub(r' ','-',subnode)) |
namenodetext=dom.createTextNode(subnodes[subnode].decode("utf8")) |
namenodetext=dom.createTextNode(unicode(subnodes[subnode],"utf8")) |
namenode.appendChild(namenodetext) |
namenode.appendChild(namenodetext) |
root.appendChild(namenode) |
root.appendChild(namenode) |
|
|
Line 71 def changeNodesInIndexMeta(path,node,sub
|
Line 99 def changeNodesInIndexMeta(path,node,sub
|
for subnode in subnodes.keys(): |
for subnode in subnodes.keys(): |
|
|
namenode=dom.createElement(re.sub(r' ','-',subnode)) |
namenode=dom.createElement(re.sub(r' ','-',subnode)) |
namenodetext=dom.createTextNode(subnodes[subnode].decode("utf8")) |
namenodetext=dom.createTextNode(unicode(subnodes[subnode],"utf8")) |
namenode.appendChild(namenodetext) |
namenode.appendChild(namenodetext) |
nodeNew.appendChild(namenode) |
nodeNew.appendChild(namenode) |
|
|