0
|
1 # Dieses Tool konvertiert graphMl Files erzeugt mit yED in einen graphen, entweder im dot, Format oder als RDF
|
|
2 # TODO: lots of ... Zuordnung graphenTyp zu Ontologie sollte konfiurierbar sein.
|
|
3 from lxml import etree
|
|
4 import os.path
|
|
5 import os
|
6
|
6 import logging
|
0
|
7
|
|
8 namespaces={'graphML':'http://graphml.graphdrawing.org/xmlns',
|
|
9 'y':'http://www.yworks.com/xml/graphml'
|
|
10 }
|
|
11
|
|
12 # Alle wesentlichen Informstionrn fuer einen Knoten
|
|
13 class Node:
|
|
14
|
|
15 label="" #Label in yED, wird auch gebraucht um gleiche Knoten zu identifizieren, wenn mehr als ein Graph zusammengefuehrt werden soll.
|
|
16 #Es muss also beim Erstellen auf Konsistenz der Label geachtet werden.
|
|
17 internalID="" #InternalID eine Knotens innerhalb eines Graphen, dieses sind nur pro Graph eindeutig-
|
|
18 externalRef="" # Referenzen auf externe Weseiten (key="d4")
|
|
19 internalRef="" # Referenzen auf andere Graphen (key="d4"), unterscheidung zwischen externer und interner, ob Pfad mit http beginnt.
|
|
20 numId=0 #Id des Knotens sollte eindeutig fuer alle Knoten sein.
|
|
21 nodeType=-1
|
|
22
|
|
23 def __init__(self,numId):
|
|
24 self.numId=numId
|
|
25
|
|
26 #Kante
|
|
27 class Edge:
|
|
28 src=None
|
|
29 target=None
|
|
30 edgeType=-1
|
|
31
|
|
32 def __init__(self,src,target):
|
|
33 self.src=src
|
|
34 self.target=target
|
|
35
|
|
36
|
|
37 # Erzeugt Ids fuer die Knoten
|
|
38 class IDDispensor:
|
|
39 currentID=0
|
|
40 def getID(self):
|
|
41 self.currentID+=1
|
|
42 return self.currentID
|
|
43
|
|
44 # Der eigentliche Graph
|
|
45 class Graph:
|
|
46 label2Ids={} # Zuweisung labels zu den Ids des Knoten
|
|
47 #id2nodes={} #Zuweisung id zu den Knoten
|
|
48 internalId2nodesID={} #Zuweisung interneID zur allgemeinen ID
|
|
49 edges=set() # Menger der Kanten
|
|
50 id2label={} # Zuweisung id zu den Labeln
|
|
51 edgeStyles={}
|
|
52 edgeDescription={}
|
|
53 graphURI=""
|
|
54 nodeStyles={}
|
|
55 nodeDescription={}
|
6
|
56 partOfGraph={}
|
|
57 isSubGraphOf={}
|
0
|
58
|
|
59 def __init__(self,dispensor):
|
|
60 self.dispensor=dispensor;
|
|
61 self.id2nodes={};
|
|
62 self.edges=set();
|
6
|
63 self.partOfGraph={};
|
|
64 self.isSubGraphOf={}
|
|
65 self.startGraphId=""
|
0
|
66
|
6
|
67 def readGraph(self,graphNode,partOf="main"):
|
|
68
|
|
69 nodes=graphNode.xpath("./graphML:node",namespaces=namespaces)
|
0
|
70
|
6
|
71 graphIDs=graphNode.xpath("@id",namespaces=namespaces)
|
|
72 for graphID in graphIDs:
|
|
73 graphIDString = unicode(graphID)
|
|
74
|
|
75
|
0
|
76
|
6
|
77 if partOf=="main": ##startgraph
|
|
78 self.startGraphId=graphIDString
|
|
79 else:
|
|
80 self.isSubGraphOf[graphIDString]=partOf
|
|
81
|
0
|
82 for node in nodes:
|
|
83 nodeIDs=node.xpath("@id",namespaces=namespaces)
|
6
|
84
|
|
85
|
|
86 #labels=node.xpath(".//y:NodeLabel",namespaces=namespaces)
|
|
87 labels=node.xpath('./graphML:data[@key="d6"]/y:*/y:NodeLabel',namespaces=namespaces)
|
|
88
|
0
|
89 for nodeID in nodeIDs:
|
|
90 nodeIDString=unicode(nodeID)
|
6
|
91
|
|
92
|
0
|
93 labelString=None
|
|
94 for label in labels:
|
|
95 labelString=unicode(label.text).lstrip().rstrip()
|
|
96
|
|
97
|
|
98 newNode = Node(self.dispensor.getID())
|
6
|
99 if labelString!=None:
|
|
100 newNode.label=labelString
|
|
101 else:
|
|
102 newNode.label="NODE:"+str(newNode.numId)
|
|
103
|
0
|
104 newNode.internalID=nodeIDString
|
|
105
|
6
|
106
|
|
107
|
0
|
108 nodeRefs=node.xpath('./graphML:data[@key="d4"]',namespaces=namespaces)
|
|
109 #nodeRefs=node.xpath("./graphML:data",namespaces=namespaces)
|
|
110 for nodeRef in nodeRefs:
|
|
111 nodeRefString=nodeRef.text
|
|
112 if nodeRefString is None:
|
|
113 continue
|
|
114
|
|
115 #hack dw, scheinbar falsche urs drin
|
|
116 if nodeRefString.find("http")>0:
|
|
117 continue
|
|
118 if nodeRefString.lstrip().startswith("http:") or nodeRefString.startswith("https:") or nodeRefString.startswith("ftp:"):
|
|
119 newNode.externalRef=nodeRefString
|
|
120 else:
|
|
121 newNode.internalRef=nodeRefString
|
|
122
|
|
123 #--
|
|
124
|
|
125 #read styles
|
|
126 typeID=None
|
|
127 #fall 1 Generic Node
|
|
128 gns = node.xpath('./graphML:data[@key="d6"]/y:GenericNode',namespaces=namespaces)
|
|
129 for gn in gns:
|
|
130 style="g"
|
|
131 fills=gn.xpath('./y:Fill',namespaces=namespaces)
|
|
132 fs=None
|
|
133 for fill in fills:
|
|
134 fs=fill.attrib
|
|
135
|
|
136 borderstyles=gn.xpath('./y:BorderStyle',namespaces=namespaces)
|
|
137 bs=None
|
|
138 for borderstyle in borderstyles:
|
|
139 bs=borderstyle.attrib
|
|
140
|
|
141
|
|
142 config=gn.attrib.get("configuration")
|
|
143
|
|
144
|
|
145 typeID=self.getNodeTypeFromPalette(style,fs,bs,config)
|
|
146
|
|
147
|
|
148 #fall 2 shape Node
|
|
149 gns = node.xpath('./graphML:data[@key="d6"]/y:ShapeNode',namespaces=namespaces)
|
|
150 for gn in gns:
|
|
151 style="s"
|
|
152 fills=gn.xpath('./y:Fill',namespaces=namespaces)
|
|
153 fs=None
|
|
154 for fill in fills:
|
|
155 fs=fill.attrib
|
|
156
|
|
157 borderstyles=gn.xpath('./y:BorderStyle',namespaces=namespaces)
|
|
158 bs=None
|
|
159 for borderstyle in borderstyles:
|
|
160 bs=borderstyle.attrib
|
|
161
|
|
162
|
|
163 shapes=gn.xpath('./y:Shape',namespaces=namespaces)
|
|
164 shapeType=None
|
|
165 for shape in shapes:
|
|
166 shapeType=shape.attrib.get("type")
|
|
167
|
|
168
|
|
169 typeID=self.getNodeTypeFromPalette(style,fs,bs,shapeType)
|
|
170
|
|
171
|
|
172 #---
|
|
173 if typeID is None:
|
|
174 typeID=-1
|
|
175 newNode.nodeType=typeID
|
|
176 self.id2nodes[newNode.numId]=newNode
|
6
|
177
|
|
178 self.partOfGraph[newNode.numId]=graphIDString #speichere node ist teil von
|
|
179
|
0
|
180 if labelString!=None:
|
|
181 self.label2Ids[labelString]=newNode.numId
|
|
182
|
|
183
|
|
184 self.internalId2nodesID[newNode.internalID]=newNode.numId
|
|
185
|
6
|
186
|
|
187 #suche nach subgraphen
|
|
188 graphs=node.xpath("./graphML:graph",namespaces=namespaces)
|
|
189
|
|
190 for graph in graphs:
|
|
191 self.readGraph(graph,graphIDString)
|
|
192
|
|
193
|
|
194 edges=graphNode.xpath("./graphML:edge",namespaces=namespaces)
|
0
|
195
|
|
196 for edge in edges:
|
|
197 srcIDs=edge.xpath("@source",namespaces=namespaces)
|
|
198 tarIDs=edge.xpath("@target",namespaces=namespaces)
|
|
199 for srcID in srcIDs:
|
|
200 source=unicode(srcID)
|
|
201
|
|
202 for tarID in tarIDs:
|
|
203 target=unicode(tarID)
|
|
204
|
|
205 #read styles
|
|
206
|
|
207 #lineStyles=edge.xpath('./graphML:data[@key="d10"]/y:GenericEdge/y:LineStyle',namespaces=namespaces)
|
|
208 lineStyles=edge.xpath('./graphML:data[@key="d10"]/.//y:LineStyle',namespaces=namespaces)
|
|
209 ls=None
|
|
210 for lineStyle in lineStyles:
|
|
211 ls=lineStyle.attrib
|
|
212
|
|
213 #arrows=edge.xpath('./graphML:data[@key="d10"]/y:GenericEdge/y:Arrows',namespaces=namespaces)
|
|
214 arrows=edge.xpath('./graphML:data[@key="d10"]/.//y:Arrows',namespaces=namespaces)
|
|
215 ars=None
|
|
216 for arrow in arrows:
|
|
217 ars=arrow.attrib
|
|
218
|
|
219 typeID=self.getTypeFromPalette(ls,ars)
|
|
220
|
|
221 newEdge=Edge(self.internalId2nodesID.get(source),self.internalId2nodesID.get(target))
|
|
222 newEdge.edgeType=typeID
|
|
223
|
|
224
|
|
225 self.edges.add(newEdge)
|
|
226
|
6
|
227
|
|
228
|
|
229 def convertGraphml(self,filename):
|
|
230 """Konvertiert ein Grahphml-File in ein Netzwerk mit Knoten und Kanten.
|
|
231 Die Abbildung von Layout auf Knoten- und Kantentypen erfolgt durch den Vergleich des Layout mit einer Palette
|
|
232 """
|
|
233 print "converting:"+filename
|
|
234
|
|
235 tree = etree.parse(filename)
|
|
236
|
|
237 #lese hauptgraphen
|
|
238 maingraphs=tree.xpath('./graphML:graph',namespaces=namespaces)
|
|
239 for maingraph in maingraphs:
|
|
240 self.readGraph(maingraph)
|
|
241
|
0
|
242
|
|
243 def getNodeTypeFromPalette(self,style,fs,bs,config):
|
|
244 for key,value in self.nodeStyles.items():
|
|
245 styleVorlage,fsVorlage,bsVorlage,configVorlage=value
|
|
246 if style!=styleVorlage:
|
|
247 continue
|
|
248
|
|
249 if config!=configVorlage:
|
|
250 continue
|
|
251
|
|
252
|
|
253 if self.cmpDict(fs,fsVorlage) and self.cmpDict(bs,bsVorlage):
|
|
254 print key
|
|
255 return key
|
|
256
|
|
257 return -1
|
|
258
|
|
259
|
|
260
|
|
261 def getTypeFromPalette(self,ls,ars):
|
|
262
|
|
263 for key,value in self.edgeStyles.items():
|
|
264 lsVorlage,arsVorlage=value
|
|
265 if self.cmpDict(ls,lsVorlage) and self.cmpDict(ars,arsVorlage):
|
|
266
|
|
267 return key
|
|
268
|
|
269 return -1
|
|
270
|
|
271 def cmpDict(self,x,y):
|
|
272 """Teste zwei dicts auf Gleichheit"""
|
|
273
|
|
274 if (x is None) or (y is None):
|
|
275 return False
|
|
276
|
|
277 for key in x.keys():
|
|
278 yVal=y.get(key,None)
|
|
279 xVal=x.get(key)
|
|
280 if yVal!=xVal:
|
|
281 return False
|
|
282
|
|
283 return True
|
|
284
|
|
285 def exportAsDot(self,filename,graphName,onlyMs=False,partOfGraph=None,linksToGraph=None):
|
|
286 out = file(filename,"w")
|
|
287
|
|
288 type2NodeShape={0:'style="solid" color="blue"',
|
|
289 1:'style="solid" color="lightblue"',
|
|
290 2:'style="solid" color="blue"',
|
|
291 3:'style="tapered" color="orange"',
|
|
292 4:'style="solid" color="green"',
|
|
293 5:'style="solid" color="sienna"',
|
|
294 6:'style="solid" color="magenta"',
|
|
295 -1:'style="dotted" color="red"'
|
|
296 }
|
|
297
|
|
298 type2EdgeShape={0:'style="dotted" color="blue"',
|
|
299 1:'style="solid"',
|
|
300 2:'style="bold"',
|
|
301 3:'style="tapered"',
|
|
302 4:'style="solid" color="green"',
|
|
303 5:'style="solid" color="sienna"',
|
|
304 6:'style="solid" color="magenta"',
|
|
305 -1:'style="dotted" color="red"'
|
|
306 }
|
|
307
|
|
308 out.write("""digraph %s {"""%graphName)
|
|
309 g=self
|
|
310
|
|
311 for key,value in g.id2nodes.items():
|
|
312 #name=value.label.replace("\n","").replace(" ","_").replace(".","_").replace("(","_").replace(")","_").replace("?","_").replace("'","_").replace(",","_")
|
|
313 #name=value.numId
|
|
314 name=key
|
6
|
315 if value.label==None:
|
|
316 label="EMPTYLABEL"
|
|
317 else:
|
|
318 label=value.label.replace("\n","")
|
0
|
319 url=value.externalRef
|
|
320
|
|
321 if onlyMs:
|
|
322 if not label.lstrip().startswith("Add"):
|
|
323 continue
|
|
324 try:
|
|
325 s="""%s [label="%s" URL="%s" %s];\n"""%(name,label.decode("utf-8"),url,type2NodeShape.get(value.nodeType))
|
|
326 out.write(s)
|
|
327 except:
|
6
|
328 s="""%s [label="%s" URL="%s" %s];\n"""%(name,label.encode("utf-8"),url,type2NodeShape.get(value.nodeType))
|
0
|
329 out.write(s)
|
|
330
|
|
331
|
|
332 for edge in g.edges:
|
|
333 try:
|
|
334 #sr=g.id2label.get(edge.src).replace("\n","").replace(" ","_").replace(".","_").replace("(","_").replace(")","_").replace("?","_").replace("'","_").replace(",","_").replace("-","_")
|
|
335 sr=edge.src
|
|
336
|
|
337 tg=edge.target
|
|
338 #tg=g.id2label.get(edge.target).replace("\n","").replace(" ","_").replace(".","_").replace("(","_").replace(")","_").replace("?","_").replace("'","_").replace(",","_").replace("-","_")
|
|
339
|
|
340 s = """%s -> %s [%s];\n"""%(sr,tg,type2EdgeShape.get(edge.edgeType))
|
|
341 out.write(s)
|
|
342 except:
|
|
343 pass
|
|
344
|
|
345
|
6
|
346 toGraphs=set() #sammle alle graphen
|
|
347 for fromNode,toGraph in g.partOfGraph.items():
|
|
348
|
|
349 s = """%s -> %s [color="blue"];\n"""%(fromNode,toGraph)
|
|
350 toGraphs.add(toGraph)
|
|
351 out.write(s)
|
|
352
|
|
353 for fromNode,toGraph in g.isSubGraphOf.items():
|
|
354 s = """%s -> %s [color="blue"];\n"""%(fromNode,toGraph)
|
|
355 toGraphs.add(toGraph)
|
|
356 out.write(s)
|
|
357
|
|
358 for toGraph in toGraphs:
|
|
359 s = """%s [label="%s" color="blue" fillcolor="blue" style="filled"];\n"""%(str(toGraph).replace(".","_"),toGraph)
|
|
360
|
|
361 out.write(s)
|
|
362
|
|
363
|
|
364
|
|
365
|
|
366
|
0
|
367 if not partOfGraph is None:
|
|
368 for nodeID,graphList in partOfGraph.items():
|
|
369 #fromNode=g.id2label.get(nodeID).replace("\n","").replace(" ","_").replace(".","_").replace("(","_").replace(")","_").replace("?","_").replace("'","_").replace(",","_").replace("-","_")
|
|
370 fromNode=nodeID
|
6
|
371 for graph in [graphList]:
|
0
|
372 try:
|
|
373 s = """G_%s -> %s [color="yellow"];\n"""%(graph.replace(".","_"),fromNode)
|
|
374 out.write(s)
|
|
375 s = """G_%s [label="%s" color="green" fillcolor="green" style="filled"];\n"""%(graph.replace(".","_"),graph)
|
|
376 out.write(s)
|
|
377 except:
|
|
378 pass
|
|
379
|
|
380
|
|
381 if not linksToGraph is None:
|
|
382 for nodeID,graph in linksToGraph:
|
|
383 #fromNode=g.id2label.get(nodeID).replace("\n","").replace(" ","_").replace(".","_").replace("(","_").replace(")","_").replace("?","_").replace("'","_").replace(",","_").replace("-","_")
|
|
384 fromNode=nodeID
|
|
385 splitted=graph.split("/")
|
|
386 print graph
|
|
387 gr = splitted[-1]
|
|
388 print gr
|
|
389 try:
|
|
390 s = """%s -> G_%s [color="green"];\n"""%(fromNode,gr.replace(".","_"))
|
|
391 out.write(s)
|
|
392 s = """G_%s [label="%s" color="green" fillcolor="green" style="filled"];\n"""%(gr.replace(".","_"),gr)
|
|
393 out.write(s)
|
|
394 except:
|
|
395 pass
|
|
396 out.write("}")
|
|
397
|
|
398
|
|
399 out.close()
|
|
400
|
|
401
|
6
|
402 def exportAsRDF(self,filename,graphName,onlyMs=False,partOfGraph=None,linksToGraph=None,describe=True):
|
0
|
403 out = file(filename,"w")
|
|
404
|
6
|
405 base="http://ontologies.mpiwg-berlin.mpg.de/research/harriot.owl/"
|
|
406 ressourceBase="http://entities.mpiwg-berlin.mpg.de/research/harriot.owl/"
|
0
|
407 type2NodeShape={0: base+"Topic",
|
|
408 1: base+"Topic",
|
|
409 2: base+"Topic",
|
|
410 3: base+"FolioPage",
|
|
411 4: base+"4",
|
|
412 5: base+"RelatedFolioPage",
|
|
413 -1: base+"UNKNOWN"}
|
|
414
|
|
415 type2EdgeShape={0: base+"has_prev_by_pagination",
|
|
416 1: base+"has_conjectural_relation",
|
|
417 2: base+"has_prev_by_conjection",
|
|
418 3: base+"has_prev_by_conjection",
|
|
419 4: base+"result_used_from",
|
|
420 5: base+"result_used_from",
|
|
421 -1: base+"is_related_to"}
|
|
422
|
|
423 #out.write("""digraph %s {"""%graphName)
|
|
424 g=self
|
|
425
|
|
426 for key,value in g.id2nodes.items():
|
|
427 #name=value.label.replace("\n","").replace(" ","_").replace(".","_").replace("(","_").replace(")","_").replace("?","_").replace("'","_").replace(",","_")
|
|
428 #name=value.numId
|
|
429 name=key
|
6
|
430 if value.label==None:
|
|
431 label="EMPTYLABEL3"
|
|
432 else:
|
|
433 label=value.label.replace("\n","")
|
0
|
434 url=value.externalRef
|
|
435
|
|
436 if onlyMs:
|
|
437 if not label.lstrip().startswith("Add"):
|
|
438 continue
|
|
439
|
5
|
440 ressourceURI=ressourceBase+str(name)
|
0
|
441 try:
|
|
442 s=""
|
|
443 if label!="":
|
|
444 s+="""<%s> <http://www.w3.org/2000/01/rdf-schema#label> "%s".\n"""%(ressourceURI,label.decode("utf-8").replace('"','\"'))
|
6
|
445 if url!="" and describe:
|
|
446 s+="""<%s> <%s> <%s>.\n"""%(ressourceURI,base+"describes",url.lstrip().rstrip())
|
0
|
447 print value.nodeType
|
|
448 s+="""<%s> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type><%s>.\n"""%(ressourceURI,type2NodeShape.get(value.nodeType))
|
|
449 #s="""%s [label="%s" URL="%s" %s];\n"""%(name,label.decode("utf-8"),url,type2NodeShape.get(value.nodeType))
|
|
450 out.write(s)
|
|
451 except:
|
|
452 if label!="":
|
6
|
453 #s="""<%s> <http://www.w3.org/2000/01/rdf-schema#label> "%s".\n"""%(ressourceURI,'CHECK_THIS')
|
|
454 s="""<%s> <http://www.w3.org/2000/01/rdf-schema#label> "%s".\n"""%(ressourceURI,label.encode("utf-8").replace('"','\"'))
|
0
|
455 #s="""<%s> <http://www.w3.org/2000/01/rdf-schema#label> "%s"."""%(ressourceURI,repr(label).replace('"','\"'))
|
|
456 if url!="":
|
|
457 s+="""<%s> <%s> <%s>."""%(ressourceURI,base+"describes",url)
|
|
458 s+="""<%s> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type><%s>.\n"""%(ressourceURI,type2NodeShape.get(value.nodeType))
|
|
459 #s="""%s [label="%s" URL="%s" %s];\n"""%(name,repr(label),url,type2NodeShape.get(value.nodeType))
|
|
460 out.write(s)
|
|
461
|
|
462
|
|
463 for edge in g.edges:
|
|
464 try:
|
|
465 #sr=g.id2label.get(edge.src).replace("\n","").replace(" ","_").replace(".","_").replace("(","_").replace(")","_").replace("?","_").replace("'","_").replace(",","_").replace("-","_")
|
|
466 sr=edge.src
|
|
467
|
|
468 tg=edge.target
|
|
469 #tg=g.id2label.get(edge.target).replace("\n","").replace(" ","_").replace(".","_").replace("(","_").replace(")","_").replace("?","_").replace("'","_").replace(",","_").replace("-","_")
|
|
470
|
5
|
471 s ="""<%s><%s><%s>.\n"""%(ressourceBase+str(sr),type2EdgeShape.get(edge.edgeType),ressourceBase+str(tg))
|
0
|
472 #s = """%s -> %s [%s];\n"""%(sr,tg,type2EdgeShape.get(edge.edgeType))
|
|
473 out.write(s)
|
|
474 except:
|
|
475 pass
|
6
|
476
|
|
477
|
|
478 toGraphs=set() #sammle alle graphen
|
|
479 for fromNode,toGraph in g.partOfGraph.items():
|
|
480 s="""<%s> <%s> <%s>.\n"""%(ressourceBase+str(fromNode),base+"is_part_of_graph",ressourceBase+str(toGraph))
|
|
481 toGraphs.add(toGraph)
|
|
482 out.write(s)
|
0
|
483
|
6
|
484 for fromNode,toGraph in g.isSubGraphOf.items():
|
|
485 s="""<%s> <%s> <%s>.\n"""%(ressourceBase+str(fromNode),base+"is_subGraph_of",ressourceBase+str(toGraph))
|
|
486 toGraphs.add(toGraph)
|
|
487 out.write(s)
|
|
488
|
|
489 for toGraph in toGraphs:
|
|
490 s="""<%s> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type><%s>.\n"""%(ressourceBase+str(toGraph),base+"SubGraph")
|
|
491 out.write(s)
|
|
492
|
0
|
493 if not partOfGraph is None:
|
|
494 for nodeID,graphList in partOfGraph.items():
|
|
495 #fromNode=g.id2label.get(nodeID).replace("\n","").replace(" ","_").replace(".","_").replace("(","_").replace(")","_").replace("?","_").replace("'","_").replace(",","_").replace("-","_")
|
|
496 fromNode=nodeID
|
6
|
497 for graph in [graphList]:
|
0
|
498 try:
|
6
|
499 ressourceURI=ressourceBase+graph.replace(" ","_")
|
|
500 s ="""<%s><%s><%s>.\n"""%(ressourceBase+str(fromNode),base+"is_part_of_mainGraph",ressourceURI)
|
0
|
501 #s = """G_%s -> %s [color="yellow"];\n"""%(graph.replace(".","_"),fromNode)
|
|
502 out.write(s)
|
|
503
|
|
504 s="""<%s> <http://www.w3.org/2000/01/rdf-schema#label> "%s".\n"""%(ressourceURI,graph)
|
6
|
505 s+="""<%s> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type><%s>.\n"""%(ressourceURI,base+"Graph")
|
0
|
506
|
|
507 #s = """G_%s [label="%s" color="green" fillcolor="green" style="filled"];\n"""%(graph.replace(".","_"),graph)
|
|
508 out.write(s)
|
|
509 except:
|
|
510 pass
|
|
511
|
|
512
|
|
513 if not linksToGraph is None:
|
|
514 for nodeID,graph in linksToGraph:
|
|
515 #fromNode=g.id2label.get(nodeID).replace("\n","").replace(" ","_").replace(".","_").replace("(","_").replace(")","_").replace("?","_").replace("'","_").replace(",","_").replace("-","_")
|
|
516 fromNode=nodeID
|
|
517 splitted=graph.split("/")
|
|
518 print graph
|
|
519 gr = splitted[-1]
|
|
520 print gr
|
5
|
521 ressourceURI=ressourceBase+gr
|
6
|
522 ressourceURI.replace(" ","_")
|
0
|
523
|
|
524
|
|
525 typeSrc=type2NodeShape.get(nodeID)
|
|
526 if typeSrc==base+"Topic":
|
6
|
527 relation=base+"is_specified_in"
|
0
|
528 else:
|
6
|
529 relation=base+"see_also"
|
0
|
530
|
|
531 try:
|
5
|
532 s ="""<%s><%s><%s>.\n"""%(ressourceBase+str(fromNode),relation,ressourceURI)
|
0
|
533 #s = """%s -> G_%s [color="green"];\n"""%(fromNode,gr.replace(".","_"))
|
|
534 out.write(s)
|
|
535
|
|
536 s="""<%s> <http://www.w3.org/2000/01/rdf-schema#label> "%s".\n"""%(ressourceURI,gr)
|
6
|
537 s+="""<%s> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type><%s>.\n"""%(ressourceURI,base+"Graph")
|
0
|
538
|
|
539 #s = """G_%s [label="%s" color="green" fillcolor="green" style="filled"];\n"""%(gr.replace(".","_"),gr)
|
|
540 out.write(s)
|
|
541 except:
|
|
542 pass
|
|
543 #out.write("}")
|
|
544
|
|
545 out.close()
|
|
546 def readPalette(self,palettePath):
|
|
547 typeNr=0
|
|
548 palette = etree.parse(palettePath)
|
|
549 edges=palette.xpath("//graphML:edge",namespaces=namespaces)
|
|
550 # lage alle kanten in der palette
|
|
551 for edge in edges:
|
|
552 #relevant fuer die einordnugn ist data key=12 linestyle und arrows
|
|
553 lineStyles=edge.xpath('./graphML:data[@key="d12"]/y:GenericEdge/y:LineStyle',namespaces=namespaces)
|
|
554 ls=None
|
|
555 for lineStyle in lineStyles:
|
|
556 ls=lineStyle
|
|
557
|
|
558 arrows=edge.xpath('./graphML:data[@key="d12"]/y:GenericEdge/y:Arrows',namespaces=namespaces)
|
|
559 ars=None
|
|
560 for arrow in arrows:
|
|
561 ars=arrow
|
|
562
|
|
563 #get description
|
|
564 ds=""
|
|
565 descriptions=edge.xpath('./graphML:data[@key="d9"]',namespaces=namespaces)
|
|
566 for description in descriptions:
|
|
567 ds=description
|
|
568
|
|
569 self.edgeDescription[typeNr]=ds.text
|
|
570 self.edgeStyles[typeNr]=(ls.attrib,ars.attrib)
|
|
571 typeNr+=1
|
|
572
|
|
573 typeNr=0
|
|
574 nodes=palette.xpath("//graphML:node",namespaces=namespaces)
|
|
575 for node in nodes:
|
|
576 style=""
|
|
577 #fall 1 Generic Node
|
|
578 gns = node.xpath('./graphML:data[@key="d7"]/y:GenericNode',namespaces=namespaces)
|
|
579 for gn in gns:
|
|
580 style="g"
|
|
581 fills=gn.xpath('./y:Fill',namespaces=namespaces)
|
|
582 fs=None
|
|
583 for fill in fills:
|
|
584 fs=fill.attrib
|
|
585
|
|
586 borderstyles=gn.xpath('./y:BorderStyle',namespaces=namespaces)
|
|
587 bs=None
|
|
588 for borderstyle in borderstyles:
|
|
589 bs=borderstyle.attrib
|
|
590
|
|
591
|
|
592 config=gn.attrib.get("configuration")
|
|
593
|
|
594 #get description
|
|
595 ds=""
|
|
596 descriptions=node.xpath('./graphML:data[@key="d4"]',namespaces=namespaces)
|
|
597 for description in descriptions:
|
|
598 ds=description.text
|
|
599
|
|
600 self.nodeDescription[typeNr]=ds
|
|
601
|
|
602 self.nodeStyles[typeNr]=(style,fs,bs,config)
|
|
603 typeNr+=1
|
|
604
|
|
605 #fall 2 shape Node
|
|
606 gns = node.xpath('./graphML:data[@key="d7"]/y:ShapeNode',namespaces=namespaces)
|
|
607 for gn in gns:
|
|
608 style="s"
|
|
609 fills=gn.xpath('./y:Fill',namespaces=namespaces)
|
|
610 fs=None
|
|
611 for fill in fills:
|
|
612 fs=fill.attrib
|
|
613
|
|
614 borderstyles=gn.xpath('./y:BorderStyle',namespaces=namespaces)
|
|
615 bs=None
|
|
616 for borderstyle in borderstyles:
|
|
617 bs=borderstyle.attrib
|
|
618
|
|
619
|
|
620 shapes=gn.xpath('./y:Shape',namespaces=namespaces)
|
|
621 shapeType=None
|
|
622 for shape in shapes:
|
|
623 shapeType=shape.attrib.get("type")
|
|
624
|
|
625
|
|
626
|
|
627 #get description
|
|
628 ds=""
|
|
629 descriptions=node.xpath('./graphML:data[@key="d4"]',namespaces=namespaces)
|
|
630 for description in descriptions:
|
|
631 ds=description.text
|
|
632
|
|
633 self.nodeDescription[typeNr]=ds
|
|
634 self.nodeStyles[typeNr]=(style,fs,bs,shapeType)
|
|
635 typeNr+=1
|
|
636
|
|
637 def merge(graphs,dispensor):
|
|
638 filter=['supermap.graphml']
|
|
639 partOfGraph={}
|
6
|
640 partOfSubGraph={}
|
0
|
641 linksToGraph=set()
|
|
642
|
|
643 edges=set()
|
|
644 mg =Graph(dispensor)
|
6
|
645
|
|
646
|
0
|
647 for g in graphs:
|
6
|
648
|
0
|
649 if g.graphURI in filter:
|
|
650 continue
|
|
651 idalt2neu={}
|
6
|
652
|
|
653
|
0
|
654 for nodeid in g.id2nodes.keys():
|
|
655 node=g.id2nodes.get(nodeid)
|
|
656 label=node.label
|
|
657 currentID =mg.label2Ids.get(label,dispensor.getID()) #hole id wenn existent sonst neue
|
|
658
|
|
659 mg.label2Ids[label]=currentID
|
|
660 mg.id2label[currentID]=label
|
|
661 idalt2neu[node.numId]=currentID
|
|
662 mg.id2nodes[currentID]=node
|
|
663
|
|
664
|
|
665 if node.internalRef!="":
|
|
666 linksToGraph.add((currentID,node.internalRef))
|
|
667
|
6
|
668 #containedIn = partOfGraph.get(currentID,set())
|
0
|
669
|
|
670
|
6
|
671 #containedIn.add(g.graphURI)
|
|
672 #partOfGraph[currentID]=containedIn
|
0
|
673
|
|
674
|
|
675 for edge in g.edges:
|
|
676 src=edge.src
|
|
677 target=edge.target
|
|
678
|
|
679 edge.src=idalt2neu.get(src)
|
|
680 edge.target=idalt2neu.get(target)
|
|
681 edges.add(edge)
|
|
682
|
6
|
683 graphsOldToNew={}
|
|
684 for nodeID,toGraph in g.partOfGraph.items():
|
|
685
|
|
686 node=g.id2nodes.get(nodeID)
|
|
687 label=node.label
|
|
688 currentID =mg.label2Ids.get(label,dispensor.getID()) #hole id wenn existent sonst neue
|
|
689
|
|
690 #graphID = graphsOldToNew.get(graph,dispensor.getID()) #hole id wenn existent sonst neue
|
|
691 toGraphID = graphsOldToNew.get(toGraph,dispensor.getID()) #hole id wenn existent sonst neue
|
|
692 graphsOldToNew[toGraph]=toGraphID
|
|
693 mg.partOfGraph[currentID]=toGraphID
|
|
694
|
|
695
|
|
696 for fromGraph,toGraph in g.isSubGraphOf.items():
|
|
697 toGraphID = graphsOldToNew.get(toGraph,dispensor.getID()) #hole id wenn existent sonst neue
|
|
698 graphsOldToNew[toGraph]=toGraphID
|
|
699
|
|
700 fromGraphID = graphsOldToNew.get(fromGraph,dispensor.getID()) #hole id wenn existent sonst neue
|
|
701 graphsOldToNew[fromGraph]=fromGraphID
|
|
702 mg.isSubGraphOf[fromGraphID]=toGraphID
|
|
703
|
|
704
|
|
705 startID=graphsOldToNew[g.startGraphId]
|
|
706 partOfGraph[startID]=g.graphURI
|
|
707
|
0
|
708 mg.edges=edges
|
6
|
709 #mg.partOfGraph=partOfSubGraph
|
0
|
710 return mg,partOfGraph,linksToGraph
|
|
711
|
|
712 if __name__ == '__main__':
|
|
713 dispensor = IDDispensor()
|
|
714 #g1=Graph(dispensor)
|
|
715 #g1.readPalette("/Users/dwinter/Documents/Projekte/Diss - data-mining/eclipseWorkspace/graphML2RDF/examples/Manuscripts_3.graphml")
|
|
716 #g1.convertGrahml("/Users/dwinter/Documents/Projekte/Diss - data-mining/eclipseWorkspace/graphML2RDF/examples/5.7.3_cubics_other.graphml")
|
|
717
|
|
718
|
|
719
|
6
|
720 #path="/Users/dwinter/Documents/Projekte/Europeana/-graphml/Maps_20120523/"
|
|
721 path="/Users/dwinter/Documents/Projekte/Europeana/harriot-graphml/Maps_20120626/"
|
|
722 #path="/Users/dwinter/Documents/Projekte/Europeana/-graphml/Maps_short/"
|
0
|
723 ls = os.listdir(path)
|
|
724 graphs=set()
|
|
725
|
|
726 for l in ls:
|
|
727
|
6
|
728
|
|
729 try:
|
|
730 g1=Graph(dispensor)
|
|
731 g1.readPalette("/Users/dwinter/Documents/Projekte/Diss - data-mining/eclipseWorkspace/graphML2RDF/examples/Manuscripts_3.graphml")
|
0
|
732
|
6
|
733 g1.convertGraphml(path+l)
|
|
734 g1.graphURI=l
|
|
735 graphs.add(g1)
|
|
736 except:
|
|
737 logging.error("Can't handle:"+l)
|
0
|
738 g,po,lg = merge(graphs,dispensor)
|
|
739
|
|
740
|
|
741 #print g.label2Ids.keys()
|
|
742 #print len(g.label2Ids.keys())
|
|
743
|
|
744 #g.readPalette("/Users/dwinter/Documents/Projekte/Diss - data-mining/eclipseWorkspace/graphML2RDF/examples/Manuscripts_3.graphml")
|
6
|
745 #g.exportAsDot("/tmp/out.dot", "",onlyMs=False,partOfGraph=po,linksToGraph=lg)
|
0
|
746 g.exportAsDot("/tmp/out.dot", "harriot",onlyMs=False,partOfGraph=po,linksToGraph=lg)
|
6
|
747 g.exportAsRDF("/tmp/out.rdf", "harriot",onlyMs=False,partOfGraph=po,linksToGraph=lg,describe=True)
|
0
|
748
|
|
749 out2 = file("/tmp/out.txt","w")
|
|
750 for key in g.label2Ids.keys():
|
|
751 try:
|
|
752 out2.write('"'+key+'"'+"\n")
|
|
753 except:
|
|
754 pass
|
|
755 out2.close()
|
|
756 |