Mercurial > hg > eSciDocImport
comparison src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java @ 3:58b52df9763c
added update functionality if index.meta has changed
author | dwinter |
---|---|
date | Wed, 12 Jan 2011 11:00:14 +0100 |
parents | fab8e78184fa |
children | cb5668b07bfc |
comparison
equal
deleted
inserted
replaced
2:fab8e78184fa | 3:58b52df9763c |
---|---|
1 package de.mpiwg.itgroup.eSciDoc.harvesting; | 1 package de.mpiwg.itgroup.eSciDoc.harvesting; |
2 | 2 |
3 import java.io.File; | |
4 import java.io.FileWriter; | |
5 import java.io.IOException; | 3 import java.io.IOException; |
6 import java.io.InputStream; | 4 import java.io.InputStream; |
7 import java.net.MalformedURLException; | 5 import java.io.UnsupportedEncodingException; |
8 import java.net.URL; | 6 import java.net.URL; |
9 import java.util.ArrayList; | 7 import java.util.ArrayList; |
8 import java.util.Map; | |
10 | 9 |
11 import org.apache.http.HttpEntity; | 10 import org.apache.http.HttpEntity; |
12 import org.apache.http.HttpResponse; | 11 import org.apache.http.HttpResponse; |
13 import org.apache.log4j.BasicConfigurator; | |
14 import org.apache.log4j.Level; | 12 import org.apache.log4j.Level; |
15 import org.apache.log4j.Logger; | 13 import org.apache.log4j.Logger; |
16 import org.apache.log4j.xml.DOMConfigurator; | 14 import org.apache.log4j.xml.DOMConfigurator; |
17 import org.jdom.JDOMException; | 15 import org.jdom.JDOMException; |
18 | 16 |
19 import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler; | 17 import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler; |
20 import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools; | 18 import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools; |
21 import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHOObject; | 19 import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHOObject; |
22 import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHORessource; | 20 import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHORessource; |
21 import de.mpiwg.itgroup.eSciDoc.exceptions.ConnectorException; | |
22 import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException; | |
23 import de.mpiwg.itgroup.eSciDoc.exceptions.ObjectNotUniqueError; | |
24 import de.mpiwg.itgroup.eSciDoc.exceptions.TransformerException; | |
23 import de.mpiwg.itgroup.eSciDoc.importer.ECHOImporter; | 25 import de.mpiwg.itgroup.eSciDoc.importer.ECHOImporter; |
24 import de.mpiwg.itgroup.eSciDoc.importer.Importer; | 26 import de.mpiwg.itgroup.eSciDoc.importer.Importer; |
25 import de.mpiwg.itgroup.eSciDoc.transformer.ECHOTransformer; | 27 import de.mpiwg.itgroup.eSciDoc.transformer.ECHOTransformer; |
26 import de.mpiwg.itgroup.eSciDoc.transformer.Transformer; | 28 import de.mpiwg.itgroup.eSciDoc.transformer.Transformer; |
27 import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject; | 29 import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject; |
28 | 30 |
31 /** | |
32 * @author dwinter | |
33 * Main class for data harveting from index.meta files into eScidoc | |
34 */ | |
29 public class ESciDocDataHarvester { | 35 public class ESciDocDataHarvester { |
30 static int MAX_REC=1000; | 36 static int MAX_REC=1000; // maximale Anzahl von Records die in einem Stueck eingelesen bzw. bearbeitet werden. |
31 //static int MAX_REC=5; | 37 //static int MAX_REC=5; |
32 protected Logger logger = Logger.getRootLogger(); | 38 protected Logger logger = Logger.getRootLogger(); |
33 protected Importer importer; | 39 protected Importer importer; |
34 protected EScidocBasicHandler connector; | 40 protected EScidocBasicHandler connector; |
35 protected Transformer transformer; | 41 protected Transformer transformer; |
36 private EScidocTools tools; | 42 |
37 private String echoContext; | 43 private String echoContext; |
38 private Logger addedFile = Logger.getLogger("addedFilesLogger"); | 44 private Logger addedFile = Logger.getLogger("addedFilesLogger"); |
39 private Logger notAddedFile = Logger.getLogger("notAddedFilesLogger"); | 45 private Logger notAddedFile = Logger.getLogger("notAddedFilesLogger"); |
40 | 46 |
47 /** | |
48 * @param importer Importer for dataObjects, describes how to access the objects | |
49 * @param transformer Transformer, generates the eScidocMetaDatasets | |
50 * @param connector connects to the eScidocRepository | |
51 * @param context Escidoc context path z.b. /ir/context/escidoc:12001 | |
52 */ | |
41 public ESciDocDataHarvester(Importer importer, Transformer transformer, | 53 public ESciDocDataHarvester(Importer importer, Transformer transformer, |
42 EScidocBasicHandler connector, String context) throws IOException { | 54 EScidocBasicHandler connector, String context) { |
43 this.importer = importer; | 55 this.importer = importer; |
44 this.transformer = transformer; | 56 this.transformer = transformer; |
45 this.connector = connector; | 57 this.connector = connector; |
46 this.tools = new EScidocTools(connector); | 58 //this.tools = new EScidocTools(connector); |
47 this.echoContext = context; | 59 this.echoContext = context; |
48 | 60 |
49 } | 61 } |
50 | 62 |
51 public Boolean readObjectsFromInstance(String type) throws Exception { | 63 /** Read objects into eScidoc or updates the objects if indexMeta has changed. |
64 * @param type restrict the imported objects to a specific type, possible types should be defined in | |
65 * the given importer @see {@link #importer} | |
66 * @return | |
67 * @throws ConnectorException | |
68 * @throws TransformerException | |
69 * @throws ESciDocXmlObjectException | |
70 */ | |
71 public Boolean readObjectsFromInstance(String type) throws ConnectorException, TransformerException, ESciDocXmlObjectException { | |
52 ArrayList<String> addedObjects = new ArrayList<String>(); | 72 ArrayList<String> addedObjects = new ArrayList<String>(); |
53 ArrayList<String> notAddedObjects = new ArrayList<String>(); | 73 ArrayList<String> notAddedObjects = new ArrayList<String>(); |
54 for (ECHOObject obj : importer.getObjectList(type)) { | 74 for (ECHOObject obj : importer.getObjectList(type)) { |
55 | 75 |
56 if (ECHORessource.class.isInstance(obj)) { | 76 if (ECHORessource.class.isInstance(obj)) { |
57 try { | 77 try { |
58 if (connector.alreadyExists( | 78 ECHOObject old; |
59 "/md-records/md-record/admin/archivePath", | 79 try { |
60 ((ECHORessource) obj).archivePath, echoContext)) { | 80 old = connector.alreadyExists( |
81 "/md-records/md-record/admin/archivePath", | |
82 ((ECHORessource) obj).archivePath, echoContext); | |
83 } catch (ObjectNotUniqueError e) { | |
84 // TODO Auto-generated catch block | |
85 e.printStackTrace(); | |
86 continue; | |
87 } | |
88 if (old!=null) { | |
61 logger.debug("already exist:" | 89 logger.debug("already exist:" |
62 + ((ECHORessource) obj).archivePath); | 90 + ((ECHORessource) obj).archivePath); |
91 handleExistingObject(obj,old); | |
63 continue; | 92 continue; |
64 } | 93 } |
65 } catch (Exception e) { | 94 } catch (ConnectorException e) { |
66 logger.debug("already exist error"); | 95 logger.debug("already exist error:"); |
67 e.printStackTrace(); | 96 e.printStackTrace(); |
68 continue; | 97 continue; |
69 } | 98 } |
70 } | 99 } |
71 | 100 |
75 if (contid != null) { | 104 if (contid != null) { |
76 System.out.println("------- belongsTo:" + contid); | 105 System.out.println("------- belongsTo:" + contid); |
77 } else { | 106 } else { |
78 | 107 |
79 eSciDocXmlObject escidocItem = transformer.transform(obj); | 108 eSciDocXmlObject escidocItem = transformer.transform(obj); |
80 logger.info(escidocItem.printXML()); | 109 |
81 // TODO write PID to back to echo-obj | 110 |
82 Boolean result = connector.createItem(escidocItem); | 111 try { |
83 if (result) { | 112 logger.info(escidocItem.printXML()); |
84 addedObjects.add(escidocItem.getESciDocId()); | 113 // TODO write PID to back to echo-obj |
85 addedFile.debug(escidocItem.getESciDocId() + "\n"); | 114 Boolean result = connector.createItem(escidocItem); |
86 // addedFile.write(escidocItem.getESciDocId()+"\n"); | 115 if (result) { |
87 // addedFile.flush(); | 116 addedObjects.add(escidocItem.getESciDocId()); |
88 | 117 addedFile.debug(escidocItem.getESciDocId() + "\n"); |
89 } else { | 118 |
90 notAddedObjects.add(obj.echoUrl); | 119 } else { |
91 notAddedFile.debug(obj.echoUrl); | 120 notAddedObjects.add(obj.echoUrl); |
92 // notAddedFile.write(obj.echoUrl+"\n"); | 121 notAddedFile.debug(obj.echoUrl); |
93 // notAddedFile.flush(); | 122 |
94 } | 123 } |
95 // if (result == ESciDocConnector.WRITE_RESULT_PID_EXISTS){ | 124 |
96 // logger.info("PID already exists:"+obj); | 125 } catch (IOException e) { |
97 // } else if (result == | 126 // TODO Auto-generated catch block |
98 // ESciDocConnector.WRITE_RESULT_OBJ_WITH_SAME_REFERENCE){ | 127 e.printStackTrace(); |
99 // logger.info("Object with reference to the same digital object already exists:"+obj); | 128 throw new ESciDocXmlObjectException(); |
100 // } | 129 } catch (JDOMException e) { |
101 | 130 // TODO Auto-generated catch block |
131 e.printStackTrace(); | |
132 throw new ESciDocXmlObjectException(); | |
133 } | |
134 | |
102 } | 135 } |
103 } | 136 } |
104 if (logger.getLevel() == Level.DEBUG) { | 137 if (logger.getLevel() == Level.DEBUG) { |
105 for (String addedObject : addedObjects) { | 138 for (String addedObject : addedObjects) { |
106 logger.debug(addedObject); | 139 logger.debug(addedObject); |
107 } | 140 } |
108 } | 141 } |
109 | 142 |
110 // File outFile = new File("/tmp/import.out"); | |
111 // FileWriter fw = new FileWriter(outFile); | |
112 // for (String addedObject:addedObjects){ | |
113 // fw.write(addedObject+"\n"); | |
114 // } | |
115 // for (String addedObject:notAddedObjects){ | |
116 // fw.write(addedObject+"\n"); | |
117 // } | |
118 // fw.close(); | |
119 return true; | 143 return true; |
144 } | |
145 | |
146 | |
147 | |
148 | |
149 | |
150 /** | |
151 * Deal with existing objects, do nothing if md5 of stored metadata and metadata on the server is the same otherwise call {@link #updateObject(ECHOObject)}. | |
152 * @param objNew | |
153 * @param old | |
154 * @throws TransformerException | |
155 * @throws ESciDocXmlObjectException | |
156 */ | |
157 private void handleExistingObject(ECHOObject objNew, ECHOObject old) throws TransformerException, ESciDocXmlObjectException { | |
158 ECHORessource objNewRes = (ECHORessource)objNew; | |
159 ECHORessource objOldRes = (ECHORessource)old; | |
160 String md5onServer = objNewRes.getIndexMetaMD5onServer(); | |
161 String md5=objOldRes.getIndexMetaMD5stored(); | |
162 if (md5onServer.equals(md5)) | |
163 return; | |
164 else { | |
165 updateObject(objNew, old); | |
166 } | |
167 | |
168 } | |
169 | |
170 private void updateObject(ECHOObject objNew, ECHOObject objOld) throws TransformerException, ESciDocXmlObjectException { | |
171 objNew.context = echoContext; | |
172 eSciDocXmlObject escidocItem = transformer.transform(objNew); | |
173 String lastModificationDateOld = objOld.lastModificationDate; | |
174 escidocItem.setLastModificationDate(lastModificationDateOld); | |
175 try { | |
176 HttpResponse ret = connector.eScidocPut(objOld.eScidocId, EScidocBasicHandler.convertStringToStream(escidocItem.printXML())); | |
177 HttpEntity ent = ret.getEntity(); | |
178 if (ret.getStatusLine().getStatusCode() != 200) { | |
179 logger.debug("Can not update:" + objOld.eScidocId); | |
180 // res.getEntity().consumeContent(); // necessary to release | |
181 // the conneciton | |
182 ent.consumeContent(); | |
183 | |
184 } | |
185 InputStream restream = ret.getEntity().getContent(); | |
186 logger.debug(EScidocBasicHandler.convertStreamToString(restream)); | |
187 } catch (UnsupportedEncodingException e) { | |
188 // TODO Auto-generated catch block | |
189 e.printStackTrace(); | |
190 } catch (IOException e) { | |
191 // TODO Auto-generated catch block | |
192 e.printStackTrace(); | |
193 } | |
194 | |
195 | |
120 } | 196 } |
121 | 197 |
122 /** | 198 /** |
123 * @param command | 199 * @param command |
124 * @param objectXPath | 200 * @param objectXPath |