0
|
1 package de.mpiwg.itgroup.eSciDoc.harvesting;
|
|
2
|
|
3 import java.io.File;
|
|
4 import java.io.FileWriter;
|
|
5 import java.io.IOException;
|
|
6 import java.net.MalformedURLException;
|
|
7 import java.net.URL;
|
|
8 import java.util.ArrayList;
|
|
9
|
|
10
|
|
11 import org.apache.http.HttpResponse;
|
|
12 import org.apache.log4j.BasicConfigurator;
|
|
13 import org.apache.log4j.Level;
|
|
14 import org.apache.log4j.Logger;
|
|
15 import org.apache.log4j.xml.DOMConfigurator;
|
|
16 import org.jdom.JDOMException;
|
|
17
|
|
18 import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler;
|
|
19 import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools;
|
|
20 import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHOObject;
|
|
21 import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHORessource;
|
|
22 import de.mpiwg.itgroup.eSciDoc.importer.ECHOImporter;
|
|
23 import de.mpiwg.itgroup.eSciDoc.importer.Importer;
|
|
24 import de.mpiwg.itgroup.eSciDoc.transformer.ECHOTransformer;
|
|
25 import de.mpiwg.itgroup.eSciDoc.transformer.Transformer;
|
|
26 import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject;
|
|
27
|
|
28 public class ESciDocDataHarvester {
|
|
29
|
|
30 protected Logger logger = Logger.getRootLogger();
|
|
31 protected Importer importer;
|
|
32 protected EScidocBasicHandler connector;
|
|
33 protected Transformer transformer;
|
|
34 private EScidocTools tools;
|
|
35 private String echoContext;
|
|
36 private Logger addedFile = Logger.getLogger("addedFilesLogger");
|
|
37 private Logger notAddedFile = Logger.getLogger("notAddedFilesLogger");
|
|
38
|
|
39
|
|
40 public ESciDocDataHarvester(Importer importer, Transformer transformer, EScidocBasicHandler connector, String context) throws IOException{
|
|
41 this.importer=importer;
|
|
42 this.transformer=transformer;
|
|
43 this.connector=connector;
|
|
44 this.tools=new EScidocTools(connector);
|
|
45 this.echoContext= context;
|
|
46
|
|
47
|
|
48 }
|
|
49 public Boolean readObjectsFromInstance(String type) throws Exception{
|
|
50 ArrayList<String> addedObjects = new ArrayList<String>();
|
|
51 ArrayList<String> notAddedObjects = new ArrayList<String>();
|
|
52 for (ECHOObject obj: importer.getObjectList(type)){
|
|
53
|
|
54
|
|
55 if (ECHORessource.class.isInstance(obj)){
|
|
56 try {
|
|
57 if (connector.alreadyExists("/md-records/md-record/admin/archivePath",((ECHORessource)obj).archivePath,echoContext)){
|
|
58 logger.debug("already exist:"+((ECHORessource)obj).archivePath);
|
|
59 continue;
|
|
60 }
|
|
61 } catch (Exception e) {
|
|
62 logger.debug("already exist error");
|
|
63 e.printStackTrace();
|
|
64 continue;
|
|
65 }
|
|
66 }
|
|
67
|
|
68 obj.context=echoContext;
|
|
69
|
|
70 String contid=connector.getIDfromPID(obj.pid,echoContext);
|
|
71 if (contid!=null){
|
|
72 System.out.println("------- belongsTo:"+contid);
|
|
73 } else {
|
|
74
|
|
75 eSciDocXmlObject escidocItem = transformer.transform(obj);
|
|
76 logger.info(escidocItem.printXML());
|
|
77 // TODO write PID to back to echo-obj
|
|
78 Boolean result = connector.createItem(escidocItem);
|
|
79 if (result){
|
|
80 addedObjects.add(escidocItem.getESciDocId());
|
|
81 addedFile.debug(escidocItem.getESciDocId()+"\n");
|
|
82 //addedFile.write(escidocItem.getESciDocId()+"\n");
|
|
83 //addedFile.flush();
|
|
84
|
|
85 }else {
|
|
86 notAddedObjects.add(obj.echoUrl);
|
|
87 notAddedFile.debug(obj.echoUrl);
|
|
88 //notAddedFile.write(obj.echoUrl+"\n");
|
|
89 //notAddedFile.flush();
|
|
90 }
|
|
91 //if (result == ESciDocConnector.WRITE_RESULT_PID_EXISTS){
|
|
92 // logger.info("PID already exists:"+obj);
|
|
93 //} else if (result == ESciDocConnector.WRITE_RESULT_OBJ_WITH_SAME_REFERENCE){
|
|
94 // logger.info("Object with reference to the same digital object already exists:"+obj);
|
|
95 //}
|
|
96
|
|
97 }
|
|
98 }
|
|
99 if(logger.getLevel()==Level.DEBUG){
|
|
100 for (String addedObject:addedObjects){
|
|
101 logger.debug(addedObject);
|
|
102 }
|
|
103 }
|
|
104
|
|
105 // File outFile = new File("/tmp/import.out");
|
|
106 // FileWriter fw = new FileWriter(outFile);
|
|
107 // for (String addedObject:addedObjects){
|
|
108 // fw.write(addedObject+"\n");
|
|
109 // }
|
|
110 // for (String addedObject:notAddedObjects){
|
|
111 // fw.write(addedObject+"\n");
|
|
112 // }
|
|
113 // fw.close();
|
|
114 return true;
|
|
115 }
|
|
116
|
|
117 public void releaseAndSubmitObjects(String command,String objectXPath) throws Exception{
|
|
118 for (eSciDocXmlObject obj: connector.getObjectListFromFilterResult(command,objectXPath)){
|
|
119 HttpResponse res = connector.submitAnObject(obj,"first release");
|
|
120 logger.debug(res.getStatusLine());
|
|
121 if (res.getStatusLine().getStatusCode()!=200){
|
|
122 logger.debug("Can not submit:"+obj.getESciDocId());
|
|
123 //res.getEntity().consumeContent(); // necessary to release the conneciton
|
|
124
|
|
125 }
|
|
126 res.getEntity().consumeContent(); // necessary to release the conneciton
|
|
127
|
|
128 if (!connector.upDateObject(obj)){
|
|
129 logger.debug("Can not update:"+obj.getESciDocId());
|
|
130 //continue;
|
|
131
|
|
132 }
|
|
133
|
|
134
|
|
135 res = connector.releaseAnObject(obj, "first release");
|
|
136 logger.debug(res.getStatusLine());
|
|
137 if (res.getStatusLine().getStatusCode()!=200){
|
|
138 logger.debug("Can not release:"+obj.getESciDocId());
|
|
139 res.getEntity().consumeContent(); // necessary to release the conneciton
|
|
140 continue;
|
|
141 }
|
|
142 addedFile.debug("RELEASED:"+obj.getESciDocId());
|
|
143 res.getEntity().consumeContent(); // necessary to release the conneciton
|
|
144 }
|
|
145
|
|
146 }
|
|
147 public static void main(String[] args) throws Exception{
|
|
148
|
|
149 Logger rl = Logger.getRootLogger();
|
|
150 DOMConfigurator.configure("log4uconf.xml");
|
|
151 rl.setLevel(Level.DEBUG);
|
|
152
|
|
153
|
|
154 EScidocBasicHandler connector = new EScidocBasicHandler("192.168.56.2",8080,"dwinter","weikiki7");
|
|
155 ECHOImporter newimporter = new ECHOImporter(new URL("file:///Users/dwinter/libcoll.rdf"));
|
|
156 ESciDocDataHarvester hv = new ESciDocDataHarvester(newimporter,
|
|
157 new ECHOTransformer(),connector,"/ir/context/escidoc:40001");
|
|
158
|
|
159 //hv.readObjectsFromInstance("ECHO_collection");
|
|
160 //hv.readObjectsFromInstance("ECHO_resource");
|
|
161
|
|
162 hv.releaseAndSubmitObjects("/ir/context/escidoc:40001/resources/members","//escidocItem:item");
|
|
163
|
|
164
|
|
165 // newimporter.organizeRessourcesInCollections(connector, "/ir/context/escidoc:1001");
|
|
166 //hv.releaseAndSubmitObjects("/ir/containers","//container:container");
|
|
167 }
|
|
168 }
|