Mercurial > hg > graphML2RDF
comparison graphML2RDF.py @ 0:8190d724dc01
first release
author | dwinter |
---|---|
date | Mon, 23 Jul 2012 09:48:23 +0200 |
parents | |
children | e661aabed2f9 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:8190d724dc01 |
---|---|
1 # Dieses Tool konvertiert graphMl Files erzeugt mit yED in einen graphen, entweder im dot, Format oder als RDF | |
2 # TODO: lots of ... Zuordnung graphenTyp zu Ontologie sollte konfiurierbar sein. | |
3 from lxml import etree | |
4 import os.path | |
5 import os | |
6 | |
7 | |
8 namespaces={'graphML':'http://graphml.graphdrawing.org/xmlns', | |
9 'y':'http://www.yworks.com/xml/graphml' | |
10 } | |
11 | |
12 # Alle wesentlichen Informstionrn fuer einen Knoten | |
13 class Node: | |
14 | |
15 label="" #Label in yED, wird auch gebraucht um gleiche Knoten zu identifizieren, wenn mehr als ein Graph zusammengefuehrt werden soll. | |
16 #Es muss also beim Erstellen auf Konsistenz der Label geachtet werden. | |
17 internalID="" #InternalID eine Knotens innerhalb eines Graphen, dieses sind nur pro Graph eindeutig- | |
18 externalRef="" # Referenzen auf externe Weseiten (key="d4") | |
19 internalRef="" # Referenzen auf andere Graphen (key="d4"), unterscheidung zwischen externer und interner, ob Pfad mit http beginnt. | |
20 numId=0 #Id des Knotens sollte eindeutig fuer alle Knoten sein. | |
21 nodeType=-1 | |
22 | |
23 def __init__(self,numId): | |
24 self.numId=numId | |
25 | |
26 #Kante | |
27 class Edge: | |
28 src=None | |
29 target=None | |
30 edgeType=-1 | |
31 | |
32 def __init__(self,src,target): | |
33 self.src=src | |
34 self.target=target | |
35 | |
36 | |
37 # Erzeugt Ids fuer die Knoten | |
38 class IDDispensor: | |
39 currentID=0 | |
40 def getID(self): | |
41 self.currentID+=1 | |
42 return self.currentID | |
43 | |
44 # Der eigentliche Graph | |
45 class Graph: | |
46 label2Ids={} # Zuweisung labels zu den Ids des Knoten | |
47 #id2nodes={} #Zuweisung id zu den Knoten | |
48 internalId2nodesID={} #Zuweisung interneID zur allgemeinen ID | |
49 edges=set() # Menger der Kanten | |
50 id2label={} # Zuweisung id zu den Labeln | |
51 edgeStyles={} | |
52 edgeDescription={} | |
53 graphURI="" | |
54 nodeStyles={} | |
55 nodeDescription={} | |
56 | |
57 def __init__(self,dispensor): | |
58 self.dispensor=dispensor; | |
59 self.id2nodes={}; | |
60 self.edges=set(); | |
61 | |
62 | |
63 def convertGrahml(self,filename): | |
64 """Konvertiert ein Grahphml-File in ein Netzwerk mit Knoten und Kanten. | |
65 Die Abbildung von Layout auf Knoten- und Kantentypen erfolgt durch den Vergleich des Layout mit einer Palette | |
66 """ | |
67 tree = etree.parse(filename) | |
68 | |
69 nodes=tree.xpath("//graphML:node",namespaces=namespaces) | |
70 | |
71 for node in nodes: | |
72 nodeIDs=node.xpath("@id",namespaces=namespaces) | |
73 labels=node.xpath(".//y:NodeLabel",namespaces=namespaces) | |
74 for nodeID in nodeIDs: | |
75 nodeIDString=unicode(nodeID) | |
76 | |
77 labelString=None | |
78 for label in labels: | |
79 labelString=unicode(label.text).lstrip().rstrip() | |
80 | |
81 | |
82 newNode = Node(self.dispensor.getID()) | |
83 newNode.label=labelString | |
84 newNode.internalID=nodeIDString | |
85 | |
86 nodeRefs=node.xpath('./graphML:data[@key="d4"]',namespaces=namespaces) | |
87 #nodeRefs=node.xpath("./graphML:data",namespaces=namespaces) | |
88 for nodeRef in nodeRefs: | |
89 nodeRefString=nodeRef.text | |
90 if nodeRefString is None: | |
91 continue | |
92 | |
93 #hack dw, scheinbar falsche urs drin | |
94 if nodeRefString.find("http")>0: | |
95 continue | |
96 if nodeRefString.lstrip().startswith("http:") or nodeRefString.startswith("https:") or nodeRefString.startswith("ftp:"): | |
97 newNode.externalRef=nodeRefString | |
98 else: | |
99 newNode.internalRef=nodeRefString | |
100 | |
101 #-- | |
102 | |
103 #read styles | |
104 typeID=None | |
105 #fall 1 Generic Node | |
106 gns = node.xpath('./graphML:data[@key="d6"]/y:GenericNode',namespaces=namespaces) | |
107 for gn in gns: | |
108 style="g" | |
109 fills=gn.xpath('./y:Fill',namespaces=namespaces) | |
110 fs=None | |
111 for fill in fills: | |
112 fs=fill.attrib | |
113 | |
114 borderstyles=gn.xpath('./y:BorderStyle',namespaces=namespaces) | |
115 bs=None | |
116 for borderstyle in borderstyles: | |
117 bs=borderstyle.attrib | |
118 | |
119 | |
120 config=gn.attrib.get("configuration") | |
121 | |
122 | |
123 typeID=self.getNodeTypeFromPalette(style,fs,bs,config) | |
124 | |
125 | |
126 #fall 2 shape Node | |
127 gns = node.xpath('./graphML:data[@key="d6"]/y:ShapeNode',namespaces=namespaces) | |
128 for gn in gns: | |
129 style="s" | |
130 fills=gn.xpath('./y:Fill',namespaces=namespaces) | |
131 fs=None | |
132 for fill in fills: | |
133 fs=fill.attrib | |
134 | |
135 borderstyles=gn.xpath('./y:BorderStyle',namespaces=namespaces) | |
136 bs=None | |
137 for borderstyle in borderstyles: | |
138 bs=borderstyle.attrib | |
139 | |
140 | |
141 shapes=gn.xpath('./y:Shape',namespaces=namespaces) | |
142 shapeType=None | |
143 for shape in shapes: | |
144 shapeType=shape.attrib.get("type") | |
145 | |
146 | |
147 typeID=self.getNodeTypeFromPalette(style,fs,bs,shapeType) | |
148 | |
149 | |
150 #--- | |
151 if typeID is None: | |
152 typeID=-1 | |
153 newNode.nodeType=typeID | |
154 self.id2nodes[newNode.numId]=newNode | |
155 if labelString!=None: | |
156 self.label2Ids[labelString]=newNode.numId | |
157 | |
158 | |
159 self.internalId2nodesID[newNode.internalID]=newNode.numId | |
160 | |
161 edges=tree.xpath("//graphML:edge",namespaces=namespaces) | |
162 | |
163 for edge in edges: | |
164 srcIDs=edge.xpath("@source",namespaces=namespaces) | |
165 tarIDs=edge.xpath("@target",namespaces=namespaces) | |
166 for srcID in srcIDs: | |
167 source=unicode(srcID) | |
168 | |
169 for tarID in tarIDs: | |
170 target=unicode(tarID) | |
171 | |
172 #read styles | |
173 | |
174 #lineStyles=edge.xpath('./graphML:data[@key="d10"]/y:GenericEdge/y:LineStyle',namespaces=namespaces) | |
175 lineStyles=edge.xpath('./graphML:data[@key="d10"]/.//y:LineStyle',namespaces=namespaces) | |
176 ls=None | |
177 for lineStyle in lineStyles: | |
178 ls=lineStyle.attrib | |
179 | |
180 #arrows=edge.xpath('./graphML:data[@key="d10"]/y:GenericEdge/y:Arrows',namespaces=namespaces) | |
181 arrows=edge.xpath('./graphML:data[@key="d10"]/.//y:Arrows',namespaces=namespaces) | |
182 ars=None | |
183 for arrow in arrows: | |
184 ars=arrow.attrib | |
185 | |
186 typeID=self.getTypeFromPalette(ls,ars) | |
187 | |
188 newEdge=Edge(self.internalId2nodesID.get(source),self.internalId2nodesID.get(target)) | |
189 newEdge.edgeType=typeID | |
190 | |
191 | |
192 self.edges.add(newEdge) | |
193 | |
194 | |
195 def getNodeTypeFromPalette(self,style,fs,bs,config): | |
196 for key,value in self.nodeStyles.items(): | |
197 styleVorlage,fsVorlage,bsVorlage,configVorlage=value | |
198 if style!=styleVorlage: | |
199 continue | |
200 | |
201 if config!=configVorlage: | |
202 continue | |
203 | |
204 | |
205 if self.cmpDict(fs,fsVorlage) and self.cmpDict(bs,bsVorlage): | |
206 print key | |
207 return key | |
208 | |
209 return -1 | |
210 | |
211 | |
212 | |
213 def getTypeFromPalette(self,ls,ars): | |
214 | |
215 for key,value in self.edgeStyles.items(): | |
216 lsVorlage,arsVorlage=value | |
217 if self.cmpDict(ls,lsVorlage) and self.cmpDict(ars,arsVorlage): | |
218 | |
219 return key | |
220 | |
221 return -1 | |
222 | |
223 def cmpDict(self,x,y): | |
224 """Teste zwei dicts auf Gleichheit""" | |
225 | |
226 if (x is None) or (y is None): | |
227 return False | |
228 | |
229 for key in x.keys(): | |
230 yVal=y.get(key,None) | |
231 xVal=x.get(key) | |
232 if yVal!=xVal: | |
233 return False | |
234 | |
235 return True | |
236 | |
237 def exportAsDot(self,filename,graphName,onlyMs=False,partOfGraph=None,linksToGraph=None): | |
238 out = file(filename,"w") | |
239 | |
240 type2NodeShape={0:'style="solid" color="blue"', | |
241 1:'style="solid" color="lightblue"', | |
242 2:'style="solid" color="blue"', | |
243 3:'style="tapered" color="orange"', | |
244 4:'style="solid" color="green"', | |
245 5:'style="solid" color="sienna"', | |
246 6:'style="solid" color="magenta"', | |
247 -1:'style="dotted" color="red"' | |
248 } | |
249 | |
250 type2EdgeShape={0:'style="dotted" color="blue"', | |
251 1:'style="solid"', | |
252 2:'style="bold"', | |
253 3:'style="tapered"', | |
254 4:'style="solid" color="green"', | |
255 5:'style="solid" color="sienna"', | |
256 6:'style="solid" color="magenta"', | |
257 -1:'style="dotted" color="red"' | |
258 } | |
259 | |
260 out.write("""digraph %s {"""%graphName) | |
261 g=self | |
262 | |
263 for key,value in g.id2nodes.items(): | |
264 #name=value.label.replace("\n","").replace(" ","_").replace(".","_").replace("(","_").replace(")","_").replace("?","_").replace("'","_").replace(",","_") | |
265 #name=value.numId | |
266 name=key | |
267 label=value.label.replace("\n","") | |
268 url=value.externalRef | |
269 | |
270 if onlyMs: | |
271 if not label.lstrip().startswith("Add"): | |
272 continue | |
273 try: | |
274 s="""%s [label="%s" URL="%s" %s];\n"""%(name,label.decode("utf-8"),url,type2NodeShape.get(value.nodeType)) | |
275 out.write(s) | |
276 except: | |
277 s="""%s [label="%s" URL="%s" %s];\n"""%(name,repr(label),url,type2NodeShape.get(value.nodeType)) | |
278 out.write(s) | |
279 | |
280 | |
281 for edge in g.edges: | |
282 try: | |
283 #sr=g.id2label.get(edge.src).replace("\n","").replace(" ","_").replace(".","_").replace("(","_").replace(")","_").replace("?","_").replace("'","_").replace(",","_").replace("-","_") | |
284 sr=edge.src | |
285 | |
286 tg=edge.target | |
287 #tg=g.id2label.get(edge.target).replace("\n","").replace(" ","_").replace(".","_").replace("(","_").replace(")","_").replace("?","_").replace("'","_").replace(",","_").replace("-","_") | |
288 | |
289 s = """%s -> %s [%s];\n"""%(sr,tg,type2EdgeShape.get(edge.edgeType)) | |
290 out.write(s) | |
291 except: | |
292 pass | |
293 | |
294 | |
295 if not partOfGraph is None: | |
296 for nodeID,graphList in partOfGraph.items(): | |
297 #fromNode=g.id2label.get(nodeID).replace("\n","").replace(" ","_").replace(".","_").replace("(","_").replace(")","_").replace("?","_").replace("'","_").replace(",","_").replace("-","_") | |
298 fromNode=nodeID | |
299 for graph in graphList: | |
300 try: | |
301 s = """G_%s -> %s [color="yellow"];\n"""%(graph.replace(".","_"),fromNode) | |
302 out.write(s) | |
303 s = """G_%s [label="%s" color="green" fillcolor="green" style="filled"];\n"""%(graph.replace(".","_"),graph) | |
304 out.write(s) | |
305 except: | |
306 pass | |
307 | |
308 | |
309 if not linksToGraph is None: | |
310 for nodeID,graph in linksToGraph: | |
311 #fromNode=g.id2label.get(nodeID).replace("\n","").replace(" ","_").replace(".","_").replace("(","_").replace(")","_").replace("?","_").replace("'","_").replace(",","_").replace("-","_") | |
312 fromNode=nodeID | |
313 splitted=graph.split("/") | |
314 print graph | |
315 gr = splitted[-1] | |
316 print gr | |
317 try: | |
318 s = """%s -> G_%s [color="green"];\n"""%(fromNode,gr.replace(".","_")) | |
319 out.write(s) | |
320 s = """G_%s [label="%s" color="green" fillcolor="green" style="filled"];\n"""%(gr.replace(".","_"),gr) | |
321 out.write(s) | |
322 except: | |
323 pass | |
324 out.write("}") | |
325 | |
326 | |
327 out.close() | |
328 | |
329 | |
330 def exportAsRDF(self,filename,graphName,onlyMs=False,partOfGraph=None,linksToGraph=None): | |
331 out = file(filename,"w") | |
332 | |
333 base="http://example.org/harriotOnt/" | |
334 type2NodeShape={0: base+"Topic", | |
335 1: base+"Topic", | |
336 2: base+"Topic", | |
337 3: base+"FolioPage", | |
338 4: base+"4", | |
339 5: base+"RelatedFolioPage", | |
340 -1: base+"UNKNOWN"} | |
341 | |
342 type2EdgeShape={0: base+"has_prev_by_pagination", | |
343 1: base+"has_conjectural_relation", | |
344 2: base+"has_prev_by_conjection", | |
345 3: base+"has_prev_by_conjection", | |
346 4: base+"result_used_from", | |
347 5: base+"result_used_from", | |
348 -1: base+"is_related_to"} | |
349 | |
350 #out.write("""digraph %s {"""%graphName) | |
351 g=self | |
352 | |
353 for key,value in g.id2nodes.items(): | |
354 #name=value.label.replace("\n","").replace(" ","_").replace(".","_").replace("(","_").replace(")","_").replace("?","_").replace("'","_").replace(",","_") | |
355 #name=value.numId | |
356 name=key | |
357 label=value.label.replace("\n","") | |
358 url=value.externalRef | |
359 | |
360 if onlyMs: | |
361 if not label.lstrip().startswith("Add"): | |
362 continue | |
363 | |
364 ressourceURI=base+str(name) | |
365 try: | |
366 s="" | |
367 if label!="": | |
368 s+="""<%s> <http://www.w3.org/2000/01/rdf-schema#label> "%s".\n"""%(ressourceURI,label.decode("utf-8").replace('"','\"')) | |
369 if url!="": | |
370 s+="""<%s> <%s> <%s>.\n"""%(ressourceURI,base+"describes",url) | |
371 print value.nodeType | |
372 s+="""<%s> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type><%s>.\n"""%(ressourceURI,type2NodeShape.get(value.nodeType)) | |
373 #s="""%s [label="%s" URL="%s" %s];\n"""%(name,label.decode("utf-8"),url,type2NodeShape.get(value.nodeType)) | |
374 out.write(s) | |
375 except: | |
376 if label!="": | |
377 s="""<%s> <http://www.w3.org/2000/01/rdf-schema#label> "%s".\n"""%(ressourceURI,'CHECK_THIS') | |
378 #s="""<%s> <http://www.w3.org/2000/01/rdf-schema#label> "%s"."""%(ressourceURI,repr(label).replace('"','\"')) | |
379 if url!="": | |
380 s+="""<%s> <%s> <%s>."""%(ressourceURI,base+"describes",url) | |
381 s+="""<%s> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type><%s>.\n"""%(ressourceURI,type2NodeShape.get(value.nodeType)) | |
382 #s="""%s [label="%s" URL="%s" %s];\n"""%(name,repr(label),url,type2NodeShape.get(value.nodeType)) | |
383 out.write(s) | |
384 | |
385 | |
386 for edge in g.edges: | |
387 try: | |
388 #sr=g.id2label.get(edge.src).replace("\n","").replace(" ","_").replace(".","_").replace("(","_").replace(")","_").replace("?","_").replace("'","_").replace(",","_").replace("-","_") | |
389 sr=edge.src | |
390 | |
391 tg=edge.target | |
392 #tg=g.id2label.get(edge.target).replace("\n","").replace(" ","_").replace(".","_").replace("(","_").replace(")","_").replace("?","_").replace("'","_").replace(",","_").replace("-","_") | |
393 | |
394 s ="""<%s><%s><%s>.\n"""%(base+str(sr),type2EdgeShape.get(edge.edgeType),base+str(tg)) | |
395 #s = """%s -> %s [%s];\n"""%(sr,tg,type2EdgeShape.get(edge.edgeType)) | |
396 out.write(s) | |
397 except: | |
398 pass | |
399 | |
400 | |
401 if not partOfGraph is None: | |
402 for nodeID,graphList in partOfGraph.items(): | |
403 #fromNode=g.id2label.get(nodeID).replace("\n","").replace(" ","_").replace(".","_").replace("(","_").replace(")","_").replace("?","_").replace("'","_").replace(",","_").replace("-","_") | |
404 fromNode=nodeID | |
405 for graph in graphList: | |
406 try: | |
407 ressourceURI=base+graph | |
408 s ="""<%s><%s><%s>.\n"""%(base+str(fromNode),base+"is_part_of",ressourceURI) | |
409 #s = """G_%s -> %s [color="yellow"];\n"""%(graph.replace(".","_"),fromNode) | |
410 out.write(s) | |
411 | |
412 s="""<%s> <http://www.w3.org/2000/01/rdf-schema#label> "%s".\n"""%(ressourceURI,graph) | |
413 s+="""<%s> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type><%s>.\n"""%(ressourceURI,base+"HarriotGraph") | |
414 | |
415 #s = """G_%s [label="%s" color="green" fillcolor="green" style="filled"];\n"""%(graph.replace(".","_"),graph) | |
416 out.write(s) | |
417 except: | |
418 pass | |
419 | |
420 | |
421 if not linksToGraph is None: | |
422 for nodeID,graph in linksToGraph: | |
423 #fromNode=g.id2label.get(nodeID).replace("\n","").replace(" ","_").replace(".","_").replace("(","_").replace(")","_").replace("?","_").replace("'","_").replace(",","_").replace("-","_") | |
424 fromNode=nodeID | |
425 splitted=graph.split("/") | |
426 print graph | |
427 gr = splitted[-1] | |
428 print gr | |
429 ressourceURI=base+gr | |
430 | |
431 | |
432 | |
433 typeSrc=type2NodeShape.get(nodeID) | |
434 if typeSrc==base+"Topic": | |
435 relation="is_specified_in" | |
436 else: | |
437 relation="see_also" | |
438 | |
439 try: | |
440 s ="""<%s><%s><%s>.\n"""%(base+str(fromNode),relation,ressourceURI) | |
441 #s = """%s -> G_%s [color="green"];\n"""%(fromNode,gr.replace(".","_")) | |
442 out.write(s) | |
443 | |
444 s="""<%s> <http://www.w3.org/2000/01/rdf-schema#label> "%s".\n"""%(ressourceURI,gr) | |
445 s+="""<%s> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type><%s>.\n"""%(ressourceURI,base+"HarriotGraph") | |
446 | |
447 #s = """G_%s [label="%s" color="green" fillcolor="green" style="filled"];\n"""%(gr.replace(".","_"),gr) | |
448 out.write(s) | |
449 except: | |
450 pass | |
451 #out.write("}") | |
452 | |
453 | |
454 out.close() | |
455 def readPalette(self,palettePath): | |
456 typeNr=0 | |
457 palette = etree.parse(palettePath) | |
458 edges=palette.xpath("//graphML:edge",namespaces=namespaces) | |
459 # lage alle kanten in der palette | |
460 for edge in edges: | |
461 #relevant fuer die einordnugn ist data key=12 linestyle und arrows | |
462 lineStyles=edge.xpath('./graphML:data[@key="d12"]/y:GenericEdge/y:LineStyle',namespaces=namespaces) | |
463 ls=None | |
464 for lineStyle in lineStyles: | |
465 ls=lineStyle | |
466 | |
467 arrows=edge.xpath('./graphML:data[@key="d12"]/y:GenericEdge/y:Arrows',namespaces=namespaces) | |
468 ars=None | |
469 for arrow in arrows: | |
470 ars=arrow | |
471 | |
472 #get description | |
473 ds="" | |
474 descriptions=edge.xpath('./graphML:data[@key="d9"]',namespaces=namespaces) | |
475 for description in descriptions: | |
476 ds=description | |
477 | |
478 self.edgeDescription[typeNr]=ds.text | |
479 self.edgeStyles[typeNr]=(ls.attrib,ars.attrib) | |
480 typeNr+=1 | |
481 | |
482 typeNr=0 | |
483 nodes=palette.xpath("//graphML:node",namespaces=namespaces) | |
484 for node in nodes: | |
485 style="" | |
486 #fall 1 Generic Node | |
487 gns = node.xpath('./graphML:data[@key="d7"]/y:GenericNode',namespaces=namespaces) | |
488 for gn in gns: | |
489 style="g" | |
490 fills=gn.xpath('./y:Fill',namespaces=namespaces) | |
491 fs=None | |
492 for fill in fills: | |
493 fs=fill.attrib | |
494 | |
495 borderstyles=gn.xpath('./y:BorderStyle',namespaces=namespaces) | |
496 bs=None | |
497 for borderstyle in borderstyles: | |
498 bs=borderstyle.attrib | |
499 | |
500 | |
501 config=gn.attrib.get("configuration") | |
502 | |
503 #get description | |
504 ds="" | |
505 descriptions=node.xpath('./graphML:data[@key="d4"]',namespaces=namespaces) | |
506 for description in descriptions: | |
507 ds=description.text | |
508 | |
509 self.nodeDescription[typeNr]=ds | |
510 | |
511 self.nodeStyles[typeNr]=(style,fs,bs,config) | |
512 typeNr+=1 | |
513 | |
514 #fall 2 shape Node | |
515 gns = node.xpath('./graphML:data[@key="d7"]/y:ShapeNode',namespaces=namespaces) | |
516 for gn in gns: | |
517 style="s" | |
518 fills=gn.xpath('./y:Fill',namespaces=namespaces) | |
519 fs=None | |
520 for fill in fills: | |
521 fs=fill.attrib | |
522 | |
523 borderstyles=gn.xpath('./y:BorderStyle',namespaces=namespaces) | |
524 bs=None | |
525 for borderstyle in borderstyles: | |
526 bs=borderstyle.attrib | |
527 | |
528 | |
529 shapes=gn.xpath('./y:Shape',namespaces=namespaces) | |
530 shapeType=None | |
531 for shape in shapes: | |
532 shapeType=shape.attrib.get("type") | |
533 | |
534 | |
535 | |
536 #get description | |
537 ds="" | |
538 descriptions=node.xpath('./graphML:data[@key="d4"]',namespaces=namespaces) | |
539 for description in descriptions: | |
540 ds=description.text | |
541 | |
542 self.nodeDescription[typeNr]=ds | |
543 self.nodeStyles[typeNr]=(style,fs,bs,shapeType) | |
544 typeNr+=1 | |
545 | |
546 def merge(graphs,dispensor): | |
547 filter=['supermap.graphml'] | |
548 partOfGraph={} | |
549 linksToGraph=set() | |
550 | |
551 edges=set() | |
552 mg =Graph(dispensor) | |
553 for g in graphs: | |
554 if g.graphURI in filter: | |
555 continue | |
556 idalt2neu={} | |
557 for nodeid in g.id2nodes.keys(): | |
558 node=g.id2nodes.get(nodeid) | |
559 label=node.label | |
560 currentID =mg.label2Ids.get(label,dispensor.getID()) #hole id wenn existent sonst neue | |
561 | |
562 mg.label2Ids[label]=currentID | |
563 mg.id2label[currentID]=label | |
564 idalt2neu[node.numId]=currentID | |
565 mg.id2nodes[currentID]=node | |
566 | |
567 | |
568 if node.internalRef!="": | |
569 linksToGraph.add((currentID,node.internalRef)) | |
570 | |
571 containedIn = partOfGraph.get(currentID,set()) | |
572 | |
573 | |
574 containedIn.add(g.graphURI) | |
575 partOfGraph[currentID]=containedIn | |
576 | |
577 | |
578 for edge in g.edges: | |
579 src=edge.src | |
580 target=edge.target | |
581 | |
582 edge.src=idalt2neu.get(src) | |
583 edge.target=idalt2neu.get(target) | |
584 edges.add(edge) | |
585 | |
586 mg.edges=edges | |
587 return mg,partOfGraph,linksToGraph | |
588 | |
589 if __name__ == '__main__': | |
590 dispensor = IDDispensor() | |
591 #g1=Graph(dispensor) | |
592 #g1.readPalette("/Users/dwinter/Documents/Projekte/Diss - data-mining/eclipseWorkspace/graphML2RDF/examples/Manuscripts_3.graphml") | |
593 #g1.convertGrahml("/Users/dwinter/Documents/Projekte/Diss - data-mining/eclipseWorkspace/graphML2RDF/examples/5.7.3_cubics_other.graphml") | |
594 | |
595 | |
596 | |
597 path="/Users/dwinter/Documents/Projekte/Europeana/harriot-graphml/Maps_20120523/" | |
598 ls = os.listdir(path) | |
599 graphs=set() | |
600 | |
601 for l in ls: | |
602 | |
603 g1=Graph(dispensor) | |
604 g1.readPalette("/Users/dwinter/Documents/Projekte/Diss - data-mining/eclipseWorkspace/graphML2RDF/examples/Manuscripts_3.graphml") | |
605 | |
606 g1.convertGrahml(path+l) | |
607 g1.graphURI=l | |
608 graphs.add(g1) | |
609 | |
610 g,po,lg = merge(graphs,dispensor) | |
611 | |
612 | |
613 #print g.label2Ids.keys() | |
614 #print len(g.label2Ids.keys()) | |
615 | |
616 #g.readPalette("/Users/dwinter/Documents/Projekte/Diss - data-mining/eclipseWorkspace/graphML2RDF/examples/Manuscripts_3.graphml") | |
617 #g.exportAsDot("/tmp/out.dot", "harriot",onlyMs=False,partOfGraph=po,linksToGraph=lg) | |
618 g.exportAsDot("/tmp/out.dot", "harriot",onlyMs=False,partOfGraph=po,linksToGraph=lg) | |
619 g.exportAsRDF("/tmp/out.rdf", "harriot",onlyMs=False,partOfGraph=po,linksToGraph=lg) | |
620 | |
621 out2 = file("/tmp/out.txt","w") | |
622 for key in g.label2Ids.keys(): | |
623 try: | |
624 out2.write('"'+key+'"'+"\n") | |
625 except: | |
626 pass | |
627 out2.close() | |
628 |