annotate src/de/mpiwg/itgroup/eSciDoc/importer/ECHOImporter.java @ 0:c6929e63b0b8

first import
author dwinter
date Wed, 24 Nov 2010 16:52:07 +0100
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c6929e63b0b8 first import
dwinter
parents:
diff changeset
1 package de.mpiwg.itgroup.eSciDoc.importer;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
2
c6929e63b0b8 first import
dwinter
parents:
diff changeset
3 import java.io.File;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
4 import java.io.FileWriter;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
5 import java.io.IOException;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
6 import java.io.InputStream;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
7 import java.io.InputStreamReader;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
8 import java.io.StringReader;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
9 import java.lang.reflect.Array;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
10 import java.net.URI;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
11 import java.net.URISyntaxException;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
12 import java.net.URL;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
13 import java.util.ArrayList;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
14 import java.util.HashMap;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
15 import java.util.List;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
16
c6929e63b0b8 first import
dwinter
parents:
diff changeset
17 import org.apache.http.HttpEntity;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
18 import org.apache.http.HttpResponse;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
19 import org.apache.http.client.ClientProtocolException;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
20 import org.apache.http.client.HttpClient;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
21 import org.apache.http.client.methods.HttpGet;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
22 import org.apache.http.impl.client.DefaultHttpClient;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
23 import org.apache.log4j.Level;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
24 import org.apache.log4j.Logger;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
25 import org.jdom.Attribute;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
26 import org.jdom.Document;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
27 import org.jdom.Element;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
28 import org.jdom.JDOMException;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
29 import org.jdom.Namespace;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
30 import org.jdom.input.SAXBuilder;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
31 import org.jdom.output.XMLOutputter;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
32 import org.jdom.xpath.XPath;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
33 import org.w3c.dom.Entity;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
34
c6929e63b0b8 first import
dwinter
parents:
diff changeset
35 import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
36 import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
37 import de.mpiwg.itgroup.eSciDoc.Tools.Html2Text;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
38 import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHOCollection;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
39 import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHOObject;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
40 import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHORessource;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
41 import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
42
c6929e63b0b8 first import
dwinter
parents:
diff changeset
43 public class ECHOImporter implements Importer {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
44
c6929e63b0b8 first import
dwinter
parents:
diff changeset
45 private Logger logger = Logger.getRootLogger();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
46 private static long MAX_RES = 1000000L; // for debugging
c6929e63b0b8 first import
dwinter
parents:
diff changeset
47 private URL instanceUrl;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
48 private String collectionCMM = "/cmm/content-model/escidoc:11004";
c6929e63b0b8 first import
dwinter
parents:
diff changeset
49
c6929e63b0b8 first import
dwinter
parents:
diff changeset
50 public ECHOImporter(URL url) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
51 this.instanceUrl = url;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
52 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
53
c6929e63b0b8 first import
dwinter
parents:
diff changeset
54 @Override
c6929e63b0b8 first import
dwinter
parents:
diff changeset
55 public Iterable<ECHOObject> getObjectList(String type) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
56
c6929e63b0b8 first import
dwinter
parents:
diff changeset
57 try {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
58 return getObjectListfromRDF(type);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
59 } catch (JDOMException e) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
60 // TODO Auto-generated catch block
c6929e63b0b8 first import
dwinter
parents:
diff changeset
61 e.printStackTrace();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
62 } catch (IOException e) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
63 // TODO Auto-generated catch block
c6929e63b0b8 first import
dwinter
parents:
diff changeset
64 e.printStackTrace();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
65 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
66 return new ArrayList<ECHOObject>();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
67 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
68
c6929e63b0b8 first import
dwinter
parents:
diff changeset
69 private ArrayList<ECHOObject> getObjectListfromRDF(String type)
c6929e63b0b8 first import
dwinter
parents:
diff changeset
70 throws JDOMException, IOException {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
71
c6929e63b0b8 first import
dwinter
parents:
diff changeset
72 ArrayList<ECHOObject> ret = new ArrayList<ECHOObject>();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
73
c6929e63b0b8 first import
dwinter
parents:
diff changeset
74 SAXBuilder builder = new SAXBuilder();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
75
c6929e63b0b8 first import
dwinter
parents:
diff changeset
76 Document doc = builder.build(instanceUrl);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
77
c6929e63b0b8 first import
dwinter
parents:
diff changeset
78 Element el = doc.getRootElement();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
79
c6929e63b0b8 first import
dwinter
parents:
diff changeset
80 // get resources
c6929e63b0b8 first import
dwinter
parents:
diff changeset
81 XPath xpathResources = XPath
c6929e63b0b8 first import
dwinter
parents:
diff changeset
82 .newInstance("//rdf:Description[echonavigation:type='" + type
c6929e63b0b8 first import
dwinter
parents:
diff changeset
83 + "']");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
84 xpathResources.addNamespace("MPIWG",
c6929e63b0b8 first import
dwinter
parents:
diff changeset
85 "http://www.mpiwg-berlin.mpg.de/ns/mpiwg");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
86 xpathResources.addNamespace("rdf",
c6929e63b0b8 first import
dwinter
parents:
diff changeset
87 "http://www.w3.org/1999/02/22-rdf-syntax-ns#");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
88 xpathResources
c6929e63b0b8 first import
dwinter
parents:
diff changeset
89 .addNamespace("echonavigation", "http://www.echo.eu/rdf#");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
90
c6929e63b0b8 first import
dwinter
parents:
diff changeset
91 List<Element> paths = xpathResources.selectNodes(el);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
92
c6929e63b0b8 first import
dwinter
parents:
diff changeset
93 int counter = 0;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
94 for (Element path : paths) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
95 counter++;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
96 logger.debug("resource counter:" + String.valueOf(counter));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
97 if (logger.getLevel().equals(Level.DEBUG) && (counter > MAX_RES))
c6929e63b0b8 first import
dwinter
parents:
diff changeset
98 break;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
99
c6929e63b0b8 first import
dwinter
parents:
diff changeset
100 ECHOObject obj = getECHORessourceFromRDF(el, path, type);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
101 // fueger dem object seine PID hinzu.
c6929e63b0b8 first import
dwinter
parents:
diff changeset
102 String pid;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
103 try {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
104 pid = obj.getOrCreatePID();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
105 } catch (Exception e) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
106 // TODO Auto-generated catch block
c6929e63b0b8 first import
dwinter
parents:
diff changeset
107 e.printStackTrace();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
108 pid = null;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
109 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
110 if (pid == null) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
111 logger.error("Cannot createOrGetAn a PID for:" + obj.toString());
c6929e63b0b8 first import
dwinter
parents:
diff changeset
112 logger.error("Object will not be added");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
113 } else {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
114 ret.add(obj);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
115 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
116 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
117
c6929e63b0b8 first import
dwinter
parents:
diff changeset
118 return ret;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
119 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
120
c6929e63b0b8 first import
dwinter
parents:
diff changeset
121 private ECHOObject getECHORessourceFromRDF(Element el, Element path,
c6929e63b0b8 first import
dwinter
parents:
diff changeset
122 String echotype) throws JDOMException, IOException {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
123
c6929e63b0b8 first import
dwinter
parents:
diff changeset
124 XPath xpath = EScidocTools.getESciDocXpath("./@rdf:about");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
125 Attribute aboutAttr = (Attribute) xpath.selectSingleNode(path);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
126 String aboutString = aboutAttr.getValue();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
127 // hole das object
c6929e63b0b8 first import
dwinter
parents:
diff changeset
128
c6929e63b0b8 first import
dwinter
parents:
diff changeset
129 xpath = EScidocTools.getESciDocXpath(".//echonavigation:name");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
130 String name = ((Element) xpath.selectSingleNode(path)).getTextTrim();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
131
c6929e63b0b8 first import
dwinter
parents:
diff changeset
132 Html2Text htmlParser = new Html2Text(); // filter html codes
c6929e63b0b8 first import
dwinter
parents:
diff changeset
133 htmlParser.parse(new StringReader(name));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
134 name = htmlParser.getText();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
135
c6929e63b0b8 first import
dwinter
parents:
diff changeset
136 xpath = EScidocTools.getESciDocXpath(".//mpiwg:archive-path");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
137 Element archiveElement = (Element) xpath.selectSingleNode(path);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
138 String archivePath = "";
c6929e63b0b8 first import
dwinter
parents:
diff changeset
139 if (archiveElement != null)
c6929e63b0b8 first import
dwinter
parents:
diff changeset
140 archivePath = archiveElement.getTextTrim();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
141
c6929e63b0b8 first import
dwinter
parents:
diff changeset
142 xpath = EScidocTools.getESciDocXpath("@rdf:about");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
143 String about = ((Attribute) xpath.selectSingleNode(path)).getValue();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
144
c6929e63b0b8 first import
dwinter
parents:
diff changeset
145 // hole seq des objectes
c6929e63b0b8 first import
dwinter
parents:
diff changeset
146 String sequenceString = ("//rdf:Seq[@rdf:about='" + about + "']/rdf:li/@rdf:resource");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
147 xpath = EScidocTools.getESciDocXpath(sequenceString);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
148
c6929e63b0b8 first import
dwinter
parents:
diff changeset
149 List<Attribute> seqs = xpath.selectNodes(el);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
150
c6929e63b0b8 first import
dwinter
parents:
diff changeset
151 ECHOObject er = null;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
152 if (echotype.equals("ECHO_resource")) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
153 er = new ECHORessource(name, archivePath, aboutString);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
154 } else if (echotype.equals("ECHO_collection")) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
155 er = new ECHOCollection(name, aboutString);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
156 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
157
c6929e63b0b8 first import
dwinter
parents:
diff changeset
158 // set description
c6929e63b0b8 first import
dwinter
parents:
diff changeset
159 DefaultHttpClient hc = new DefaultHttpClient();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
160 URI echoUri;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
161 try {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
162 echoUri = new URI(er.echoUrl + "/getDescription");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
163
c6929e63b0b8 first import
dwinter
parents:
diff changeset
164 HttpGet hg = new HttpGet(echoUri);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
165
c6929e63b0b8 first import
dwinter
parents:
diff changeset
166 HttpResponse resp = hc.execute(hg);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
167 HttpEntity respEnt = resp.getEntity();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
168 if (respEnt != null) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
169 // er.description=EScidocBasicHandler.convertStreamToString(respEnt.getContent());
c6929e63b0b8 first import
dwinter
parents:
diff changeset
170 // filter html codes
c6929e63b0b8 first import
dwinter
parents:
diff changeset
171 htmlParser.parse(new InputStreamReader(respEnt.getContent()));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
172 er.description = htmlParser.getText();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
173
c6929e63b0b8 first import
dwinter
parents:
diff changeset
174 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
175 } catch (Exception e1) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
176 logger.debug("echoImporter no URI:" + er.echoUrl);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
177 // e1.printStackTrace();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
178 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
179
c6929e63b0b8 first import
dwinter
parents:
diff changeset
180 for (Attribute seq : seqs) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
181 String typeString = ("//rdf:Description[@rdf:about='"
c6929e63b0b8 first import
dwinter
parents:
diff changeset
182 + seq.getValue() + "']/echonavigation:type");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
183 xpath = EScidocTools.getESciDocXpath(typeString);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
184
c6929e63b0b8 first import
dwinter
parents:
diff changeset
185 Element typeNode = (Element) xpath.selectSingleNode(el);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
186 if (typeNode==null){
c6929e63b0b8 first import
dwinter
parents:
diff changeset
187 logger.debug("getRessourceFromRDF, no type in:"+typeString);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
188 continue;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
189 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
190 String type = (typeNode).getTextTrim();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
191 if (ECHORessource.class.isInstance(er)
c6929e63b0b8 first import
dwinter
parents:
diff changeset
192 && type.equals("ECHO_metaData")) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
193
c6929e63b0b8 first import
dwinter
parents:
diff changeset
194 HttpClient client = new DefaultHttpClient();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
195 HttpGet get = new HttpGet(seq.getValue().replace(
c6929e63b0b8 first import
dwinter
parents:
diff changeset
196 "showMetaDataXML", "getMetaDataLink"));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
197 HttpResponse ret = null;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
198 try {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
199 ret = client.execute(get);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
200
c6929e63b0b8 first import
dwinter
parents:
diff changeset
201 } catch (IOException e) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
202 // TODO Auto-generated catch block
c6929e63b0b8 first import
dwinter
parents:
diff changeset
203 e.printStackTrace();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
204 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
205 int code = ret.getStatusLine().getStatusCode();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
206 try {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
207 if ((code == 204) || (code >= 300))
c6929e63b0b8 first import
dwinter
parents:
diff changeset
208 ((ECHORessource) er).metaData = "";
c6929e63b0b8 first import
dwinter
parents:
diff changeset
209 else {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
210 String str = EScidocBasicHandler
c6929e63b0b8 first import
dwinter
parents:
diff changeset
211 .convertStreamToString(ret.getEntity()
c6929e63b0b8 first import
dwinter
parents:
diff changeset
212 .getContent());
c6929e63b0b8 first import
dwinter
parents:
diff changeset
213 ((ECHORessource) er).metaData = ((ECHORessource) er)
c6929e63b0b8 first import
dwinter
parents:
diff changeset
214 .correctML(str);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
215 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
216
c6929e63b0b8 first import
dwinter
parents:
diff changeset
217 } catch (IOException e) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
218 // TODO Auto-generated catch block
c6929e63b0b8 first import
dwinter
parents:
diff changeset
219 e.printStackTrace();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
220 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
221
c6929e63b0b8 first import
dwinter
parents:
diff changeset
222 } else if (ECHORessource.class.isInstance(er)
c6929e63b0b8 first import
dwinter
parents:
diff changeset
223 && type.equals("ECHO_fulltext")) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
224
c6929e63b0b8 first import
dwinter
parents:
diff changeset
225 HttpClient client = new DefaultHttpClient();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
226 HttpGet get = new HttpGet(seq.getValue() + "?noredirect=yes");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
227 HttpResponse ret = null;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
228 try {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
229 ret = client.execute(get);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
230
c6929e63b0b8 first import
dwinter
parents:
diff changeset
231 } catch (IOException e) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
232 // TODO Auto-generated catch block
c6929e63b0b8 first import
dwinter
parents:
diff changeset
233 e.printStackTrace();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
234 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
235
c6929e63b0b8 first import
dwinter
parents:
diff changeset
236 try {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
237 String str = EScidocBasicHandler.convertStreamToString(ret
c6929e63b0b8 first import
dwinter
parents:
diff changeset
238 .getEntity().getContent());
c6929e63b0b8 first import
dwinter
parents:
diff changeset
239 ((ECHORessource) er).fullText = new String(str);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
240 } catch (IOException e) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
241 // TODO Auto-generated catch block
c6929e63b0b8 first import
dwinter
parents:
diff changeset
242 e.printStackTrace();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
243 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
244
c6929e63b0b8 first import
dwinter
parents:
diff changeset
245 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
246
c6929e63b0b8 first import
dwinter
parents:
diff changeset
247 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
248
c6929e63b0b8 first import
dwinter
parents:
diff changeset
249 logger.debug(er.toString());
c6929e63b0b8 first import
dwinter
parents:
diff changeset
250 return er;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
251 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
252
c6929e63b0b8 first import
dwinter
parents:
diff changeset
253 /**
c6929e63b0b8 first import
dwinter
parents:
diff changeset
254 * Erzeugt Collections auf der Basis der in den Metadaten gespeicherten ECHO
c6929e63b0b8 first import
dwinter
parents:
diff changeset
255 * urls. Dabei wir der Pfad schrittweise analysiert und dann ein Baum
c6929e63b0b8 first import
dwinter
parents:
diff changeset
256 * aufgebaut.
c6929e63b0b8 first import
dwinter
parents:
diff changeset
257 *
c6929e63b0b8 first import
dwinter
parents:
diff changeset
258 * @param handler
c6929e63b0b8 first import
dwinter
parents:
diff changeset
259 * eScidoc Serververbindung
c6929e63b0b8 first import
dwinter
parents:
diff changeset
260 * @param context
c6929e63b0b8 first import
dwinter
parents:
diff changeset
261 * Context der Kollektion (sollte eine Kollektio sein die aus
c6929e63b0b8 first import
dwinter
parents:
diff changeset
262 * einer ECHO webseite aufgebaut wurde.
c6929e63b0b8 first import
dwinter
parents:
diff changeset
263 * @throws Exception
c6929e63b0b8 first import
dwinter
parents:
diff changeset
264 */
c6929e63b0b8 first import
dwinter
parents:
diff changeset
265 public void organizeRessourcesInCollections(EScidocBasicHandler handler,
c6929e63b0b8 first import
dwinter
parents:
diff changeset
266 String context) throws Exception {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
267 HashMap<String, ArrayList<String>> tree = new HashMap<String, ArrayList<String>>(); // nimmt
c6929e63b0b8 first import
dwinter
parents:
diff changeset
268 // den
c6929e63b0b8 first import
dwinter
parents:
diff changeset
269 // tree
c6929e63b0b8 first import
dwinter
parents:
diff changeset
270 // der
c6929e63b0b8 first import
dwinter
parents:
diff changeset
271 // items
c6929e63b0b8 first import
dwinter
parents:
diff changeset
272 // auf
c6929e63b0b8 first import
dwinter
parents:
diff changeset
273 HashMap<String, String> url2escidocId = new HashMap<String, String>();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
274
c6929e63b0b8 first import
dwinter
parents:
diff changeset
275 HashMap<String, String> containerUrl2escidocId = new HashMap<String, String>();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
276 HashMap<String, ArrayList<String>> containerTree = new HashMap<String, ArrayList<String>>(); // nimmt
c6929e63b0b8 first import
dwinter
parents:
diff changeset
277 // den
c6929e63b0b8 first import
dwinter
parents:
diff changeset
278 // tree
c6929e63b0b8 first import
dwinter
parents:
diff changeset
279 // der
c6929e63b0b8 first import
dwinter
parents:
diff changeset
280 // container
c6929e63b0b8 first import
dwinter
parents:
diff changeset
281 // auf
c6929e63b0b8 first import
dwinter
parents:
diff changeset
282
c6929e63b0b8 first import
dwinter
parents:
diff changeset
283 ArrayList<String> urls = handler.getAllLinksOfContext("web_page",
c6929e63b0b8 first import
dwinter
parents:
diff changeset
284 context);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
285 generateTreeAndConversion(urls, tree, url2escidocId);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
286
c6929e63b0b8 first import
dwinter
parents:
diff changeset
287 File tt = new File("/tmp/list.out");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
288 FileWriter fw= new FileWriter(tt);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
289 for (String containerUrl : tree.keySet()) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
290 fw.write(containerUrl);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
291 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
292 fw.close();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
293
c6929e63b0b8 first import
dwinter
parents:
diff changeset
294 // erzeuge jetzt die container
c6929e63b0b8 first import
dwinter
parents:
diff changeset
295 for (String containerUrl : tree.keySet()) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
296 XPath xp;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
297
c6929e63b0b8 first import
dwinter
parents:
diff changeset
298 // erzeuge Document des Container mit dem entsprechenden Kontext und den Metadaten aus dem Context.
c6929e63b0b8 first import
dwinter
parents:
diff changeset
299 Document doc = createContainer(handler, context, url2escidocId,
c6929e63b0b8 first import
dwinter
parents:
diff changeset
300 containerUrl);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
301
c6929e63b0b8 first import
dwinter
parents:
diff changeset
302 if (doc==null){
c6929e63b0b8 first import
dwinter
parents:
diff changeset
303 doc= createContainerFromECHO(handler, containerUrl, context);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
304 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
305
c6929e63b0b8 first import
dwinter
parents:
diff changeset
306 // now fill the container
c6929e63b0b8 first import
dwinter
parents:
diff changeset
307
c6929e63b0b8 first import
dwinter
parents:
diff changeset
308 xp = EScidocTools.getESciDocXpath("//struct-map:struct-map");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
309 Element structmap = (Element) xp.selectSingleNode(doc);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
310
c6929e63b0b8 first import
dwinter
parents:
diff changeset
311 // fuege die Collection selbst in den container
c6929e63b0b8 first import
dwinter
parents:
diff changeset
312 putContentInStructMap(structmap, url2escidocId.get(containerUrl));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
313
c6929e63b0b8 first import
dwinter
parents:
diff changeset
314
c6929e63b0b8 first import
dwinter
parents:
diff changeset
315 //fuege nun nur die ressourcen hinzu
c6929e63b0b8 first import
dwinter
parents:
diff changeset
316 for (String content : tree.get(containerUrl)) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
317 if (!contentIsCollection(handler,content))
c6929e63b0b8 first import
dwinter
parents:
diff changeset
318 putContentInStructMap(structmap, content);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
319 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
320 logger.debug(printXML(doc));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
321
c6929e63b0b8 first import
dwinter
parents:
diff changeset
322 try {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
323
c6929e63b0b8 first import
dwinter
parents:
diff changeset
324 // rrzeuge das object jetzt in escidoc
c6929e63b0b8 first import
dwinter
parents:
diff changeset
325 String result = handler.createObject("/ir/container",
c6929e63b0b8 first import
dwinter
parents:
diff changeset
326 printXML(doc));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
327 xp = EScidocTools
c6929e63b0b8 first import
dwinter
parents:
diff changeset
328 .getESciDocXpath("//container:container/@xlink:href");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
329 Document containerDoc = new SAXBuilder().build(EScidocBasicHandler
c6929e63b0b8 first import
dwinter
parents:
diff changeset
330 .convertStringToStream(result));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
331 Attribute containerHref = (Attribute) xp.selectSingleNode(containerDoc);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
332 logger.debug("added container:" + containerHref);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
333 Logger.getLogger("addedFilesLogger").debug(
c6929e63b0b8 first import
dwinter
parents:
diff changeset
334 "added container:" + containerHref);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
335
c6929e63b0b8 first import
dwinter
parents:
diff changeset
336 // sichere jetzt den neuen container im container tree
c6929e63b0b8 first import
dwinter
parents:
diff changeset
337 String[] splitted = containerUrl.split("/"); // teile dazu die
c6929e63b0b8 first import
dwinter
parents:
diff changeset
338 // container url
c6929e63b0b8 first import
dwinter
parents:
diff changeset
339 // wieder auf.
c6929e63b0b8 first import
dwinter
parents:
diff changeset
340 StringBuffer buffer = new StringBuffer();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
341 for (int i = 0; i < splitted.length - 2; i++) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
342 buffer.append(splitted[i]);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
343 buffer.append("/");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
344
c6929e63b0b8 first import
dwinter
parents:
diff changeset
345 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
346 buffer.append(splitted[splitted.length - 2]);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
347
c6929e63b0b8 first import
dwinter
parents:
diff changeset
348 String parentContainer = buffer.toString();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
349
c6929e63b0b8 first import
dwinter
parents:
diff changeset
350 if (!containerTree.containsKey(parentContainer)) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
351 containerTree.put(parentContainer, new ArrayList<String>());
c6929e63b0b8 first import
dwinter
parents:
diff changeset
352 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
353 containerTree.get(parentContainer).add(containerHref.getValue());
c6929e63b0b8 first import
dwinter
parents:
diff changeset
354
c6929e63b0b8 first import
dwinter
parents:
diff changeset
355 containerUrl2escidocId.put(containerUrl, containerHref.getValue());
c6929e63b0b8 first import
dwinter
parents:
diff changeset
356
c6929e63b0b8 first import
dwinter
parents:
diff changeset
357 } catch (Exception e) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
358 Logger.getLogger("notAddedFilesLogger").debug(
c6929e63b0b8 first import
dwinter
parents:
diff changeset
359 "notadded container:" + containerUrl);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
360 logger.debug("notadded container:" + containerUrl);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
361 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
362
c6929e63b0b8 first import
dwinter
parents:
diff changeset
363 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
364 addContainer(handler, containerTree, containerUrl2escidocId, context); // add
c6929e63b0b8 first import
dwinter
parents:
diff changeset
365 // the
c6929e63b0b8 first import
dwinter
parents:
diff changeset
366 // container
c6929e63b0b8 first import
dwinter
parents:
diff changeset
367 // to
c6929e63b0b8 first import
dwinter
parents:
diff changeset
368 // the
c6929e63b0b8 first import
dwinter
parents:
diff changeset
369 // struct
c6929e63b0b8 first import
dwinter
parents:
diff changeset
370 // maps
c6929e63b0b8 first import
dwinter
parents:
diff changeset
371 // of
c6929e63b0b8 first import
dwinter
parents:
diff changeset
372 // the
c6929e63b0b8 first import
dwinter
parents:
diff changeset
373 // parents
c6929e63b0b8 first import
dwinter
parents:
diff changeset
374 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
375
c6929e63b0b8 first import
dwinter
parents:
diff changeset
376 /** Teste ob sich hinter content eine ressource oder eine collection versteckt
c6929e63b0b8 first import
dwinter
parents:
diff changeset
377 * @param content, (escidocid,echourl) des content
c6929e63b0b8 first import
dwinter
parents:
diff changeset
378 * @return
c6929e63b0b8 first import
dwinter
parents:
diff changeset
379 * @throws IOException
c6929e63b0b8 first import
dwinter
parents:
diff changeset
380 * @throws JDOMException
c6929e63b0b8 first import
dwinter
parents:
diff changeset
381 */
c6929e63b0b8 first import
dwinter
parents:
diff changeset
382 private boolean contentIsCollection(EScidocBasicHandler handler, String content) throws IOException, JDOMException {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
383 String url = content.split(",")[0];
c6929e63b0b8 first import
dwinter
parents:
diff changeset
384 HttpResponse result = handler.eScidocGet(url);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
385 InputStream xml = result.getEntity().getContent();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
386 String cmm = EScidocBasicHandler.getContentModel(xml);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
387
c6929e63b0b8 first import
dwinter
parents:
diff changeset
388 return cmm.equals(collectionCMM);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
389 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
390
c6929e63b0b8 first import
dwinter
parents:
diff changeset
391 /** FŸge einen content in die struct-map
c6929e63b0b8 first import
dwinter
parents:
diff changeset
392 * @param structmap
c6929e63b0b8 first import
dwinter
parents:
diff changeset
393 * @param content, (escidocID,url) der Ressource
c6929e63b0b8 first import
dwinter
parents:
diff changeset
394 */
c6929e63b0b8 first import
dwinter
parents:
diff changeset
395 public void putContentInStructMap(Element structmap, String content) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
396 if (content==null) // existiert nicht
c6929e63b0b8 first import
dwinter
parents:
diff changeset
397 return;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
398
c6929e63b0b8 first import
dwinter
parents:
diff changeset
399 String[] urlSplit = content.split(","); // urls von get all
c6929e63b0b8 first import
dwinter
parents:
diff changeset
400 // links haben immer die
c6929e63b0b8 first import
dwinter
parents:
diff changeset
401 // form escidoc:1,url
c6929e63b0b8 first import
dwinter
parents:
diff changeset
402 String newItemUrl = urlSplit[0];
c6929e63b0b8 first import
dwinter
parents:
diff changeset
403
c6929e63b0b8 first import
dwinter
parents:
diff changeset
404 Element newItem = new Element("item", EScidocTools.srel);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
405
c6929e63b0b8 first import
dwinter
parents:
diff changeset
406 Namespace ns = Namespace.getNamespace("xlink",
c6929e63b0b8 first import
dwinter
parents:
diff changeset
407 EScidocTools.xlink);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
408 newItem.setAttribute("href", newItemUrl, ns);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
409 structmap.addContent(newItem);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
410 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
411
c6929e63b0b8 first import
dwinter
parents:
diff changeset
412 /** Erzeuge eine Container
c6929e63b0b8 first import
dwinter
parents:
diff changeset
413 * @param handler Context des Containers
c6929e63b0b8 first import
dwinter
parents:
diff changeset
414 * @param url2escidocId Liste mit url -> escidocId Zurordnungen
c6929e63b0b8 first import
dwinter
parents:
diff changeset
415 * @param collectionURL, echo url der collection zu der der Container erzeugt werden soll
c6929e63b0b8 first import
dwinter
parents:
diff changeset
416 * @return
c6929e63b0b8 first import
dwinter
parents:
diff changeset
417 * @throws JDOMException
c6929e63b0b8 first import
dwinter
parents:
diff changeset
418 * @throws IOException
c6929e63b0b8 first import
dwinter
parents:
diff changeset
419 * @throws ClientProtocolException
c6929e63b0b8 first import
dwinter
parents:
diff changeset
420 */
c6929e63b0b8 first import
dwinter
parents:
diff changeset
421 public Document createContainer(EScidocBasicHandler handler,
c6929e63b0b8 first import
dwinter
parents:
diff changeset
422 String context, HashMap<String, String> url2escidocId,
c6929e63b0b8 first import
dwinter
parents:
diff changeset
423 String collectionURL) throws JDOMException, IOException,
c6929e63b0b8 first import
dwinter
parents:
diff changeset
424 ClientProtocolException {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
425
c6929e63b0b8 first import
dwinter
parents:
diff changeset
426 InputStream is = getClass()
c6929e63b0b8 first import
dwinter
parents:
diff changeset
427 .getResourceAsStream(
c6929e63b0b8 first import
dwinter
parents:
diff changeset
428 "/de/mpiwg/itgroup/eSciDoc/xmlTemplates/ECHOCollection_container.xml");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
429 Document doc = new SAXBuilder().build(is);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
430
c6929e63b0b8 first import
dwinter
parents:
diff changeset
431 XPath xp = EScidocTools
c6929e63b0b8 first import
dwinter
parents:
diff changeset
432 .getESciDocXpath("//srel:context/@xlink:href");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
433 Attribute href = (Attribute) xp.selectSingleNode(doc);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
434 href.setValue(context);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
435
c6929e63b0b8 first import
dwinter
parents:
diff changeset
436 xp = EScidocTools
c6929e63b0b8 first import
dwinter
parents:
diff changeset
437 .getESciDocXpath("//srel:content-model/@xlink:href");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
438 href = (Attribute) xp.selectSingleNode(doc);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
439 href.setValue(collectionCMM); // TODO mache das
c6929e63b0b8 first import
dwinter
parents:
diff changeset
440 // konfigurierbar,
c6929e63b0b8 first import
dwinter
parents:
diff changeset
441 // nimm z.z.
c6929e63b0b8 first import
dwinter
parents:
diff changeset
442 // echocollection
c6929e63b0b8 first import
dwinter
parents:
diff changeset
443 // modell
c6929e63b0b8 first import
dwinter
parents:
diff changeset
444 String cmd = url2escidocId.get(collectionURL); // ensprechende collection existiert nicht.
c6929e63b0b8 first import
dwinter
parents:
diff changeset
445 if (cmd==null){
c6929e63b0b8 first import
dwinter
parents:
diff changeset
446 return null;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
447 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
448 InputStream in = handler
c6929e63b0b8 first import
dwinter
parents:
diff changeset
449 .eScidocGet(cmd).getEntity()
c6929e63b0b8 first import
dwinter
parents:
diff changeset
450 .getContent();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
451 Document ecDoc = new SAXBuilder().build(in);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
452
c6929e63b0b8 first import
dwinter
parents:
diff changeset
453
c6929e63b0b8 first import
dwinter
parents:
diff changeset
454 // copy description from collection to container
c6929e63b0b8 first import
dwinter
parents:
diff changeset
455 xp = EScidocTools.getESciDocXpath("/escidocItem:item//metadata-records:md-record[@name='escidoc']//dc:title");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
456
c6929e63b0b8 first import
dwinter
parents:
diff changeset
457
c6929e63b0b8 first import
dwinter
parents:
diff changeset
458 Element item = (Element) xp.selectSingleNode(ecDoc);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
459 String title = "anon";
c6929e63b0b8 first import
dwinter
parents:
diff changeset
460 if (item != null)
c6929e63b0b8 first import
dwinter
parents:
diff changeset
461 title = item.getTextTrim();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
462
c6929e63b0b8 first import
dwinter
parents:
diff changeset
463 xp = EScidocTools.getESciDocXpath("/container:container//metadata-records:md-record[@name='escidoc']//dc:title");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
464 item = (Element) xp.selectSingleNode(doc);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
465
c6929e63b0b8 first import
dwinter
parents:
diff changeset
466 item.setText(title);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
467
c6929e63b0b8 first import
dwinter
parents:
diff changeset
468 xp = EScidocTools.getESciDocXpath("/escidocItem:item//metadata-records:md-record[@name='escidoc']//dc:description");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
469 item = (Element) xp.selectSingleNode(ecDoc);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
470 String description;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
471 if (item != null) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
472 description = item.getTextTrim();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
473 xp = EScidocTools.getESciDocXpath("/container:container//metadata-records:md-record[@name='escidoc']//dc:description");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
474 item = (Element) xp.selectSingleNode(doc);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
475 item.setText(description);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
476 } else {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
477 // get description from ECHO
c6929e63b0b8 first import
dwinter
parents:
diff changeset
478 XPath url = EScidocTools
c6929e63b0b8 first import
dwinter
parents:
diff changeset
479 .getESciDocXpath(".//escidocComponents:component[escidocComponents:properties/prop:content-category[text()='web_page']]/escidocComponents:content/@xlink:href");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
480 Attribute hrefECHO = (Attribute) url.selectSingleNode(ecDoc);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
481 if (hrefECHO != null) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
482 DefaultHttpClient hc = new DefaultHttpClient();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
483 HttpGet hg = new HttpGet(hrefECHO.getValue()
c6929e63b0b8 first import
dwinter
parents:
diff changeset
484 + "/getDescription");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
485 HttpResponse resp = hc.execute(hg);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
486 HttpEntity respEnt = resp.getEntity();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
487 if (respEnt != null) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
488 item = (Element) xp.selectSingleNode(doc);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
489 item.setText(EScidocBasicHandler
c6929e63b0b8 first import
dwinter
parents:
diff changeset
490 .convertStreamToString(respEnt.getContent()));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
491 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
492
c6929e63b0b8 first import
dwinter
parents:
diff changeset
493 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
494 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
495 return doc;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
496 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
497
c6929e63b0b8 first import
dwinter
parents:
diff changeset
498 /**
c6929e63b0b8 first import
dwinter
parents:
diff changeset
499 * Erzeugt aus einer Liste von urls der Form escidocID,url einen hierarchischen Tree, sowie jeweils eine Zuordnung der url zur escidocid
c6929e63b0b8 first import
dwinter
parents:
diff changeset
500 * @param urls, liste der urls der Form "escidocID,url"
c6929e63b0b8 first import
dwinter
parents:
diff changeset
501 * @param tree, hier wird der Tree rein geschrieben, sollte ein leerer HashMap sein
c6929e63b0b8 first import
dwinter
parents:
diff changeset
502 * @param url2escidocId, hier wird die Zuordnung, url -> escidocID abgespeichert
c6929e63b0b8 first import
dwinter
parents:
diff changeset
503 */
c6929e63b0b8 first import
dwinter
parents:
diff changeset
504 public void generateTreeAndConversion(ArrayList<String> urls,
c6929e63b0b8 first import
dwinter
parents:
diff changeset
505 HashMap<String, ArrayList<String>> tree,
c6929e63b0b8 first import
dwinter
parents:
diff changeset
506 HashMap<String, String> url2escidocId) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
507 for (String url : urls) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
508 // teile die url auf die url ohne den letzten teil ist die url der
c6929e63b0b8 first import
dwinter
parents:
diff changeset
509 // collection
c6929e63b0b8 first import
dwinter
parents:
diff changeset
510 String[] splitted = url.split("/");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
511 url2escidocId.put(url.split(",")[1],
c6929e63b0b8 first import
dwinter
parents:
diff changeset
512 url.split(",")[0]);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
513
c6929e63b0b8 first import
dwinter
parents:
diff changeset
514
c6929e63b0b8 first import
dwinter
parents:
diff changeset
515 if (splitted.length > 1) // pfad is lang genug
c6929e63b0b8 first import
dwinter
parents:
diff changeset
516 {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
517 StringBuffer buffer = new StringBuffer();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
518 for (int i = 0; i < splitted.length - 2; i++) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
519 buffer.append(splitted[i]);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
520 buffer.append("/");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
521
c6929e63b0b8 first import
dwinter
parents:
diff changeset
522 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
523 buffer.append(splitted[splitted.length - 2]);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
524
c6929e63b0b8 first import
dwinter
parents:
diff changeset
525 String collection = buffer.toString();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
526 String collectionUrl = collection.split(",")[1]; // nur die url
c6929e63b0b8 first import
dwinter
parents:
diff changeset
527 // nicht den
c6929e63b0b8 first import
dwinter
parents:
diff changeset
528 // escidoc-anteil.
c6929e63b0b8 first import
dwinter
parents:
diff changeset
529
c6929e63b0b8 first import
dwinter
parents:
diff changeset
530 if (!tree.containsKey(collectionUrl)) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
531 tree.put(collectionUrl, new ArrayList<String>());
c6929e63b0b8 first import
dwinter
parents:
diff changeset
532 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
533 tree.get(collectionUrl).add(url);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
534
c6929e63b0b8 first import
dwinter
parents:
diff changeset
535 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
536 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
537 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
538
c6929e63b0b8 first import
dwinter
parents:
diff changeset
539 private void addContainer(EScidocBasicHandler handler,
c6929e63b0b8 first import
dwinter
parents:
diff changeset
540 HashMap<String, ArrayList<String>> containerTree,
c6929e63b0b8 first import
dwinter
parents:
diff changeset
541 HashMap<String, String> containerUrl2escidocId, String context)
c6929e63b0b8 first import
dwinter
parents:
diff changeset
542 throws Exception {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
543 for (String containerUrl : containerTree.keySet()) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
544 String escidocId = containerUrl2escidocId.get(containerUrl);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
545 // if (escidocId == null) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
546 //
c6929e63b0b8 first import
dwinter
parents:
diff changeset
547 // // TODO: some containers have no ECHOcollection or
c6929e63b0b8 first import
dwinter
parents:
diff changeset
548 // // ECHO_ressourceif this is the case create it here
c6929e63b0b8 first import
dwinter
parents:
diff changeset
549 // escidocId = createContainerFromECHO(handler, containerUrl,
c6929e63b0b8 first import
dwinter
parents:
diff changeset
550 // context);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
551 // logger.debug("container not in containerUrl2escidoc:"
c6929e63b0b8 first import
dwinter
parents:
diff changeset
552 // + containerUrl);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
553 // }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
554 if (escidocId==null){
c6929e63b0b8 first import
dwinter
parents:
diff changeset
555 logger.debug("addContainer problem not in containerUrl2escidocId:"+containerUrl);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
556 Document doc = createContainerFromECHO(handler, containerUrl, context);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
557 String res = handler.createObject("/ir/container",printXML(doc));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
558 escidocId = "/ir/container/"+EScidocBasicHandler.getId(res);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
559 //return "/ir/container/"+EScidocBasicHandler.getId(res);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
560
c6929e63b0b8 first import
dwinter
parents:
diff changeset
561 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
562 HttpResponse result = handler.eScidocGet(escidocId);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
563 String obj = EScidocBasicHandler.convertStreamToString(result
c6929e63b0b8 first import
dwinter
parents:
diff changeset
564 .getEntity().getContent());
c6929e63b0b8 first import
dwinter
parents:
diff changeset
565 String datestamp = EScidocBasicHandler.getDateStamp(obj);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
566 String body = String.format(
c6929e63b0b8 first import
dwinter
parents:
diff changeset
567 "<param last-modification-date=\"%s\">", datestamp);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
568
c6929e63b0b8 first import
dwinter
parents:
diff changeset
569 // fuege jetzt die id aller sub container ein
c6929e63b0b8 first import
dwinter
parents:
diff changeset
570 for (String content : containerTree.get(containerUrl)) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
571 String[] tmp = content.split("/");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
572 String addID = tmp[tmp.length - 1];
c6929e63b0b8 first import
dwinter
parents:
diff changeset
573 body += String.format("<id>%s</id>", addID);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
574 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
575 body += "</param>";
c6929e63b0b8 first import
dwinter
parents:
diff changeset
576 result = handler.eScidocPost(escidocId + "/members/add",
c6929e63b0b8 first import
dwinter
parents:
diff changeset
577 EScidocBasicHandler.convertStringToStream(body));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
578 String retText = EScidocBasicHandler.convertStreamToString(result
c6929e63b0b8 first import
dwinter
parents:
diff changeset
579 .getEntity().getContent());
c6929e63b0b8 first import
dwinter
parents:
diff changeset
580 logger.debug("adding result:" + retText);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
581 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
582
c6929e63b0b8 first import
dwinter
parents:
diff changeset
583 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
584
c6929e63b0b8 first import
dwinter
parents:
diff changeset
585 /**
c6929e63b0b8 first import
dwinter
parents:
diff changeset
586 * Erzeuge einen container aus echo daten
c6929e63b0b8 first import
dwinter
parents:
diff changeset
587 * @param handler
c6929e63b0b8 first import
dwinter
parents:
diff changeset
588 * @param url
c6929e63b0b8 first import
dwinter
parents:
diff changeset
589 * @param context
c6929e63b0b8 first import
dwinter
parents:
diff changeset
590 * @return
c6929e63b0b8 first import
dwinter
parents:
diff changeset
591 * @throws Exception
c6929e63b0b8 first import
dwinter
parents:
diff changeset
592 */
c6929e63b0b8 first import
dwinter
parents:
diff changeset
593 private Document createContainerFromECHO(EScidocBasicHandler handler,
c6929e63b0b8 first import
dwinter
parents:
diff changeset
594 String url, String context) throws Exception {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
595 InputStream is = getClass()
c6929e63b0b8 first import
dwinter
parents:
diff changeset
596 .getResourceAsStream(
c6929e63b0b8 first import
dwinter
parents:
diff changeset
597 "/de/mpiwg/itgroup/eSciDoc/xmlTemplates/ECHOCollection_container.xml");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
598 Document doc = new SAXBuilder().build(is);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
599
c6929e63b0b8 first import
dwinter
parents:
diff changeset
600 XPath xp = EScidocTools.getESciDocXpath("//srel:context/@xlink:href");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
601 Attribute href = (Attribute) xp.selectSingleNode(doc);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
602 href.setValue(context);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
603
c6929e63b0b8 first import
dwinter
parents:
diff changeset
604 xp = EScidocTools.getESciDocXpath("//srel:content-model/@xlink:href");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
605 href = (Attribute) xp.selectSingleNode(doc);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
606 href.setValue(collectionCMM); // TODO mache das
c6929e63b0b8 first import
dwinter
parents:
diff changeset
607 // konfigurierbar,
c6929e63b0b8 first import
dwinter
parents:
diff changeset
608 // nimm z.z.
c6929e63b0b8 first import
dwinter
parents:
diff changeset
609 // echocollection
c6929e63b0b8 first import
dwinter
parents:
diff changeset
610 // modell
c6929e63b0b8 first import
dwinter
parents:
diff changeset
611
c6929e63b0b8 first import
dwinter
parents:
diff changeset
612
c6929e63b0b8 first import
dwinter
parents:
diff changeset
613 xp = EScidocTools.getESciDocXpath("//dc:title");
c6929e63b0b8 first import
dwinter
parents:
diff changeset
614
c6929e63b0b8 first import
dwinter
parents:
diff changeset
615
c6929e63b0b8 first import
dwinter
parents:
diff changeset
616 String title = url;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
617
c6929e63b0b8 first import
dwinter
parents:
diff changeset
618
c6929e63b0b8 first import
dwinter
parents:
diff changeset
619 Element item = (Element) xp.selectSingleNode(doc);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
620
c6929e63b0b8 first import
dwinter
parents:
diff changeset
621 item.setText(title);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
622
c6929e63b0b8 first import
dwinter
parents:
diff changeset
623 //String res = handler.createObject("/ir/container",
c6929e63b0b8 first import
dwinter
parents:
diff changeset
624 // printXML(doc));
c6929e63b0b8 first import
dwinter
parents:
diff changeset
625
c6929e63b0b8 first import
dwinter
parents:
diff changeset
626 //return "/ir/container/"+EScidocBasicHandler.getId(res);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
627
c6929e63b0b8 first import
dwinter
parents:
diff changeset
628 return doc;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
629 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
630
c6929e63b0b8 first import
dwinter
parents:
diff changeset
631 private String printXML(Document doc) {
c6929e63b0b8 first import
dwinter
parents:
diff changeset
632 XMLOutputter out = new XMLOutputter();
c6929e63b0b8 first import
dwinter
parents:
diff changeset
633
c6929e63b0b8 first import
dwinter
parents:
diff changeset
634 String string = out.outputString(doc);
c6929e63b0b8 first import
dwinter
parents:
diff changeset
635 return string;
c6929e63b0b8 first import
dwinter
parents:
diff changeset
636
c6929e63b0b8 first import
dwinter
parents:
diff changeset
637 }
c6929e63b0b8 first import
dwinter
parents:
diff changeset
638 }