annotate src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java @ 0:c6929e63b0b8

first import
author dwinter
date Wed, 24 Nov 2010 16:52:07 +0100
parents
children fab8e78184fa
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c6929e63b0b8 first import
dwinter
parents:
diff changeset
1 package de.mpiwg.itgroup.eSciDoc.harvesting;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
2
c6929e63b0b8 first import
dwinter
parents:
diff changeset
3 import java.io.File;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
4 import java.io.FileWriter;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
5 import java.io.IOException;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
6 import java.net.MalformedURLException;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
7 import java.net.URL;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
8 import java.util.ArrayList;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
9
c6929e63b0b8 first import
dwinter
parents:
diff changeset
10
c6929e63b0b8 first import
dwinter
parents:
diff changeset
11 import org.apache.http.HttpResponse;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
12 import org.apache.log4j.BasicConfigurator;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
13 import org.apache.log4j.Level;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
14 import org.apache.log4j.Logger;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
15 import org.apache.log4j.xml.DOMConfigurator;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
16 import org.jdom.JDOMException;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
17
c6929e63b0b8 first import
dwinter
parents:
diff changeset
18 import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
19 import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
20 import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHOObject;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
21 import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHORessource;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
22 import de.mpiwg.itgroup.eSciDoc.importer.ECHOImporter;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
23 import de.mpiwg.itgroup.eSciDoc.importer.Importer;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
24 import de.mpiwg.itgroup.eSciDoc.transformer.ECHOTransformer;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
25 import de.mpiwg.itgroup.eSciDoc.transformer.Transformer;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
26 import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
27
c6929e63b0b8 first import
dwinter
parents:
diff changeset
28 public class ESciDocDataHarvester {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
29
c6929e63b0b8 first import
dwinter
parents:
diff changeset
30 protected Logger logger = Logger.getRootLogger();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
31 protected Importer importer;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
32 protected EScidocBasicHandler connector;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
33 protected Transformer transformer;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
34 private EScidocTools tools;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
35 private String echoContext;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
36 private Logger addedFile = Logger.getLogger("addedFilesLogger");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
37 private Logger notAddedFile = Logger.getLogger("notAddedFilesLogger");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
38
c6929e63b0b8 first import
dwinter
parents:
diff changeset
39
c6929e63b0b8 first import
dwinter
parents:
diff changeset
40 public ESciDocDataHarvester(Importer importer, Transformer transformer, EScidocBasicHandler connector, String context) throws IOException{
c6929e63b0b8 first import
dwinter
parents:
diff changeset
41 this.importer=importer;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
42 this.transformer=transformer;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
43 this.connector=connector;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
44 this.tools=new EScidocTools(connector);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
45 this.echoContext= context;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
46
c6929e63b0b8 first import
dwinter
parents:
diff changeset
47
c6929e63b0b8 first import
dwinter
parents:
diff changeset
48 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
49 public Boolean readObjectsFromInstance(String type) throws Exception{
c6929e63b0b8 first import
dwinter
parents:
diff changeset
50 ArrayList<String> addedObjects = new ArrayList<String>();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
51 ArrayList<String> notAddedObjects = new ArrayList<String>();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
52 for (ECHOObject obj: importer.getObjectList(type)){
c6929e63b0b8 first import
dwinter
parents:
diff changeset
53
c6929e63b0b8 first import
dwinter
parents:
diff changeset
54
c6929e63b0b8 first import
dwinter
parents:
diff changeset
55 if (ECHORessource.class.isInstance(obj)){
c6929e63b0b8 first import
dwinter
parents:
diff changeset
56 try {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
57 if (connector.alreadyExists("/md-records/md-record/admin/archivePath",((ECHORessource)obj).archivePath,echoContext)){
c6929e63b0b8 first import
dwinter
parents:
diff changeset
58 logger.debug("already exist:"+((ECHORessource)obj).archivePath);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
59 continue;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
60 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
61 } catch (Exception e) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
62 logger.debug("already exist error");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
63 e.printStackTrace();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
64 continue;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
65 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
66 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
67
c6929e63b0b8 first import
dwinter
parents:
diff changeset
68 obj.context=echoContext;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
69
c6929e63b0b8 first import
dwinter
parents:
diff changeset
70 String contid=connector.getIDfromPID(obj.pid,echoContext);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
71 if (contid!=null){
c6929e63b0b8 first import
dwinter
parents:
diff changeset
72 System.out.println("------- belongsTo:"+contid);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
73 } else {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
74
c6929e63b0b8 first import
dwinter
parents:
diff changeset
75 eSciDocXmlObject escidocItem = transformer.transform(obj);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
76 logger.info(escidocItem.printXML());
c6929e63b0b8 first import
dwinter
parents:
diff changeset
77 // TODO write PID to back to echo-obj
c6929e63b0b8 first import
dwinter
parents:
diff changeset
78 Boolean result = connector.createItem(escidocItem);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
79 if (result){
c6929e63b0b8 first import
dwinter
parents:
diff changeset
80 addedObjects.add(escidocItem.getESciDocId());
c6929e63b0b8 first import
dwinter
parents:
diff changeset
81 addedFile.debug(escidocItem.getESciDocId()+"\n");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
82 //addedFile.write(escidocItem.getESciDocId()+"\n");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
83 //addedFile.flush();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
84
c6929e63b0b8 first import
dwinter
parents:
diff changeset
85 }else {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
86 notAddedObjects.add(obj.echoUrl);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
87 notAddedFile.debug(obj.echoUrl);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
88 //notAddedFile.write(obj.echoUrl+"\n");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
89 //notAddedFile.flush();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
90 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
91 //if (result == ESciDocConnector.WRITE_RESULT_PID_EXISTS){
c6929e63b0b8 first import
dwinter
parents:
diff changeset
92 // logger.info("PID already exists:"+obj);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
93 //} else if (result == ESciDocConnector.WRITE_RESULT_OBJ_WITH_SAME_REFERENCE){
c6929e63b0b8 first import
dwinter
parents:
diff changeset
94 // logger.info("Object with reference to the same digital object already exists:"+obj);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
95 //}
c6929e63b0b8 first import
dwinter
parents:
diff changeset
96
c6929e63b0b8 first import
dwinter
parents:
diff changeset
97 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
98 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
99 if(logger.getLevel()==Level.DEBUG){
c6929e63b0b8 first import
dwinter
parents:
diff changeset
100 for (String addedObject:addedObjects){
c6929e63b0b8 first import
dwinter
parents:
diff changeset
101 logger.debug(addedObject);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
102 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
103 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
104
c6929e63b0b8 first import
dwinter
parents:
diff changeset
105 // File outFile = new File("/tmp/import.out");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
106 // FileWriter fw = new FileWriter(outFile);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
107 // for (String addedObject:addedObjects){
c6929e63b0b8 first import
dwinter
parents:
diff changeset
108 // fw.write(addedObject+"\n");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
109 // }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
110 // for (String addedObject:notAddedObjects){
c6929e63b0b8 first import
dwinter
parents:
diff changeset
111 // fw.write(addedObject+"\n");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
112 // }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
113 // fw.close();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
114 return true;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
115 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
116
c6929e63b0b8 first import
dwinter
parents:
diff changeset
117 public void releaseAndSubmitObjects(String command,String objectXPath) throws Exception{
c6929e63b0b8 first import
dwinter
parents:
diff changeset
118 for (eSciDocXmlObject obj: connector.getObjectListFromFilterResult(command,objectXPath)){
c6929e63b0b8 first import
dwinter
parents:
diff changeset
119 HttpResponse res = connector.submitAnObject(obj,"first release");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
120 logger.debug(res.getStatusLine());
c6929e63b0b8 first import
dwinter
parents:
diff changeset
121 if (res.getStatusLine().getStatusCode()!=200){
c6929e63b0b8 first import
dwinter
parents:
diff changeset
122 logger.debug("Can not submit:"+obj.getESciDocId());
c6929e63b0b8 first import
dwinter
parents:
diff changeset
123 //res.getEntity().consumeContent(); // necessary to release the conneciton
c6929e63b0b8 first import
dwinter
parents:
diff changeset
124
c6929e63b0b8 first import
dwinter
parents:
diff changeset
125 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
126 res.getEntity().consumeContent(); // necessary to release the conneciton
c6929e63b0b8 first import
dwinter
parents:
diff changeset
127
c6929e63b0b8 first import
dwinter
parents:
diff changeset
128 if (!connector.upDateObject(obj)){
c6929e63b0b8 first import
dwinter
parents:
diff changeset
129 logger.debug("Can not update:"+obj.getESciDocId());
c6929e63b0b8 first import
dwinter
parents:
diff changeset
130 //continue;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
131
c6929e63b0b8 first import
dwinter
parents:
diff changeset
132 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
133
c6929e63b0b8 first import
dwinter
parents:
diff changeset
134
c6929e63b0b8 first import
dwinter
parents:
diff changeset
135 res = connector.releaseAnObject(obj, "first release");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
136 logger.debug(res.getStatusLine());
c6929e63b0b8 first import
dwinter
parents:
diff changeset
137 if (res.getStatusLine().getStatusCode()!=200){
c6929e63b0b8 first import
dwinter
parents:
diff changeset
138 logger.debug("Can not release:"+obj.getESciDocId());
c6929e63b0b8 first import
dwinter
parents:
diff changeset
139 res.getEntity().consumeContent(); // necessary to release the conneciton
c6929e63b0b8 first import
dwinter
parents:
diff changeset
140 continue;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
141 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
142 addedFile.debug("RELEASED:"+obj.getESciDocId());
c6929e63b0b8 first import
dwinter
parents:
diff changeset
143 res.getEntity().consumeContent(); // necessary to release the conneciton
c6929e63b0b8 first import
dwinter
parents:
diff changeset
144 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
145
c6929e63b0b8 first import
dwinter
parents:
diff changeset
146 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
147 public static void main(String[] args) throws Exception{
c6929e63b0b8 first import
dwinter
parents:
diff changeset
148
c6929e63b0b8 first import
dwinter
parents:
diff changeset
149 Logger rl = Logger.getRootLogger();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
150 DOMConfigurator.configure("log4uconf.xml");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
151 rl.setLevel(Level.DEBUG);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
152
c6929e63b0b8 first import
dwinter
parents:
diff changeset
153
c6929e63b0b8 first import
dwinter
parents:
diff changeset
154 EScidocBasicHandler connector = new EScidocBasicHandler("192.168.56.2",8080,"dwinter","weikiki7");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
155 ECHOImporter newimporter = new ECHOImporter(new URL("file:///Users/dwinter/libcoll.rdf"));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
156 ESciDocDataHarvester hv = new ESciDocDataHarvester(newimporter,
c6929e63b0b8 first import
dwinter
parents:
diff changeset
157 new ECHOTransformer(),connector,"/ir/context/escidoc:40001");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
158
c6929e63b0b8 first import
dwinter
parents:
diff changeset
159 //hv.readObjectsFromInstance("ECHO_collection");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
160 //hv.readObjectsFromInstance("ECHO_resource");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
161
c6929e63b0b8 first import
dwinter
parents:
diff changeset
162 hv.releaseAndSubmitObjects("/ir/context/escidoc:40001/resources/members","//escidocItem:item");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
163
c6929e63b0b8 first import
dwinter
parents:
diff changeset
164
c6929e63b0b8 first import
dwinter
parents:
diff changeset
165 // newimporter.organizeRessourcesInCollections(connector, "/ir/context/escidoc:1001");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
166 //hv.releaseAndSubmitObjects("/ir/containers","//container:container");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
167 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
168 }