8
|
1 package de.mpiwg.itgroup.eSciDoc.Tools.Pubman;
|
|
2
|
|
3 import java.io.File;
|
|
4 import java.io.FileWriter;
|
|
5 import java.io.IOException;
|
|
6 import java.net.URLEncoder;
|
|
7 import java.util.HashSet;
|
|
8 import java.util.List;
|
|
9 import java.util.Set;
|
|
10 import java.util.regex.Matcher;
|
|
11 import java.util.regex.Pattern;
|
|
12
|
|
13 import javax.xml.parsers.SAXParser;
|
|
14
|
|
15 import org.apache.http.HttpEntity;
|
|
16 import org.apache.http.HttpResponse;
|
|
17 import org.apache.http.client.ClientProtocolException;
|
|
18 import org.apache.http.client.methods.HttpGet;
|
|
19 import org.apache.http.impl.client.DefaultHttpClient;
|
|
20 import org.apache.log4j.BasicConfigurator;
|
|
21 import org.apache.log4j.Level;
|
|
22 import org.apache.log4j.Logger;
|
|
23 import org.jdom.Document;
|
|
24 import org.jdom.Element;
|
|
25 import org.jdom.JDOMException;
|
|
26 import org.jdom.input.SAXBuilder;
|
|
27 import org.jdom.xpath.XPath;
|
|
28
|
|
29 import sun.util.logging.resources.logging;
|
|
30
|
|
31 import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler;
|
|
32 import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools;
|
|
33 import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException;
|
|
34 import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject;
|
|
35
|
|
36 /**
|
|
37 * Diese Klasse sucht aus den Pfaden im Pubman Eintrag der URL zu den Quellen den Identifier heraus und findet
|
|
38 * den zugehörigen Eintrag in den von der Foxridge in eScidoc geharvesten Metadaten.
|
|
39 *
|
|
40 * Daraus wird dann ein Container erstellt.
|
|
41 * @author dwinter
|
|
42 *
|
|
43 */
|
|
44 public class PubmanFoxridgeIdentifierRelationModell {
|
|
45
|
|
46 Logger logger = Logger.getRootLogger();
|
|
47 public PubmanFoxridgeIdentifierRelationModell(){
|
|
48
|
|
49 }
|
|
50
|
|
51 public static void main(String[] args) throws IllegalStateException, IOException, JDOMException, ESciDocXmlObjectException{
|
|
52 String dateString="2011-06-08T11:48:50.287Z";
|
|
53
|
|
54 String itemString="/ir/item/escidoc:162177";
|
|
55
|
|
56 String escidocServer="escidoc-test.mpiwg-berlin.mpg.de";
|
15
|
57 EScidocBasicHandler hd = new EScidocBasicHandler(escidocServer, 8080, "dwinter", "XXX");
|
8
|
58
|
|
59 if (args.length>0){
|
|
60 dateString=args[0];
|
|
61 }
|
|
62
|
|
63 if (!hd.isCurrent(itemString,dateString)){
|
|
64 System.err.println("not the last version!");
|
|
65 System.exit(0);
|
|
66 }
|
|
67
|
|
68
|
|
69 List<String> existingRelations = hd.getIdsOfRelationFromObject(itemString,"http://www.escidoc.de/ontologies/mpdl-ontologies/content-relations#hasMember");
|
|
70
|
|
71 String contextMembers="http://escidoc.mpiwg-berlin.mpg.de:8080/ir/context/escidoc:55281/resources/members";
|
|
72
|
|
73
|
|
74
|
|
75 BasicConfigurator.configure();
|
|
76 Logger.getRootLogger().setLevel(Level.ERROR);
|
|
77
|
|
78 PubmanFoxridgeIdentifierRelationModell pi = new PubmanFoxridgeIdentifierRelationModell();
|
|
79
|
|
80 Set<String> identifiers = pi.getIdentifiersFromPubmanPath(contextMembers);
|
|
81
|
|
82 Set <String> ids = new HashSet<String>();
|
|
83 int counter=0;
|
|
84 for (String id: identifiers){
|
|
85
|
|
86
|
|
87 String command=String.format("/ir/items?maximumRecords=1&operation=searchRetrieve&version=1.1&query=%%22%%2Fmd-records%%2Fmd-record%%2Fadmin%%2Fidentifier%%22%%3D%%22%s%%22", id);
|
|
88 //command+="%20and%20%22%2Fproperties%2Fcontext%2Fid%22%3d%22escidoc:1001%22";
|
|
89 //command+="%20and%20%22%2Fproperties%2Fcontext%2Fid%22%3d%22escidoc:40001%22";
|
|
90 //command+="%20and%20%22%2Fproperties%2Fcontext%2Fid%22%3d%22escidoc:12001%22";
|
|
91 System.out.println(command);
|
|
92 List<eSciDocXmlObject> objects = hd.getObjectsFromFilterResult(command, "/zs:searchRetrieveResponse/zs:records/zs:record/zs:recordData/escidocItem:item");
|
|
93 System.out.println("found");
|
|
94 for (eSciDocXmlObject obj:objects){
|
|
95 System.out.println("adding:");
|
|
96
|
|
97 String addObjId = obj.getESciDocId();
|
|
98 if(existingRelations.contains(addObjId)){
|
|
99 System.out.println("already in relations:"+addObjId);
|
|
100 } else {
|
|
101
|
|
102 ids.add(addObjId.replace("/ir/item/", ""));
|
|
103 counter+=1;
|
|
104 System.out.println("adding:"+obj.getESciDocId());
|
|
105 }
|
|
106 }
|
|
107 //if (counter>10)
|
|
108 // break;
|
|
109
|
|
110 }
|
|
111 String addMemberXML="<param last-modification-date=\""+dateString+"\">";
|
|
112 for(String id: ids){
|
|
113 addMemberXML+="<relation>"+
|
|
114 "<targetId>"+id+"</targetId>"+
|
|
115 "<predicate>http://www.escidoc.de/ontologies/mpdl-ontologies/content-relations#hasMember</predicate>" +
|
|
116 " </relation>";
|
|
117
|
|
118 }
|
|
119 addMemberXML+="</param>";
|
|
120
|
|
121 System.out.println("addmemberXML:"+addMemberXML);
|
|
122 File addFile = new File("/tmp/add.txt");
|
|
123 FileWriter fw = new FileWriter(addFile);
|
|
124 fw.write(addMemberXML);
|
|
125 fw.close();
|
|
126
|
|
127 //HttpResponse res = hd.eScidocPost("/ir/container/escidoc:161163/members/add", EScidocBasicHandler.convertStringToStream(addMemberXML));
|
|
128 //HttpResponse res = hd.eScidocPost("/ir/container/escidoc:161164/members/add", EScidocBasicHandler.convertStringToStream(addMemberXML));
|
|
129 HttpResponse res = hd.eScidocPost(itemString+"/content-relations/add", EScidocBasicHandler.convertStringToStream(addMemberXML));
|
|
130 System.out.println(EScidocBasicHandler.convertStreamToString(res.getEntity().getContent()));
|
|
131 }
|
|
132
|
|
133 private Set<String> getIdentifiersFromPubmanPath(String contextMembers) {
|
|
134 HttpGet get = new HttpGet(contextMembers);
|
|
135 DefaultHttpClient httpclient = new DefaultHttpClient();
|
|
136 Set<String> retSet = new HashSet<String>();
|
|
137 HttpResponse response;
|
|
138 try {
|
|
139 response = httpclient.execute(get);
|
|
140 } catch (ClientProtocolException e1) {
|
|
141 // TODO Auto-generated catch block
|
|
142 e1.printStackTrace();
|
|
143 return null;
|
|
144 } catch (IOException e1) {
|
|
145 // TODO Auto-generated catch block
|
|
146 e1.printStackTrace();
|
|
147 return null;
|
|
148 }
|
|
149 if (response.getStatusLine().getStatusCode()>200){
|
|
150 logger.error(contextMembers);
|
|
151 logger.error(response.getStatusLine().getReasonPhrase());
|
|
152 return null;
|
|
153 }
|
|
154
|
|
155 HttpEntity ent = response.getEntity();
|
|
156
|
|
157 SAXBuilder builder = new SAXBuilder();
|
|
158 Document doc;
|
|
159 try {
|
|
160 doc = builder.build(ent.getContent());
|
|
161 } catch (IllegalStateException e) {
|
|
162 // TODO Auto-generated catch block
|
|
163 e.printStackTrace();
|
|
164 return null;
|
|
165 } catch (JDOMException e) {
|
|
166 // TODO Auto-generated catch block
|
|
167 e.printStackTrace();
|
|
168 return null;
|
|
169 } catch (IOException e) {
|
|
170 // TODO Auto-generated catch block
|
|
171 e.printStackTrace();
|
|
172 return null;
|
|
173 }
|
|
174 XPath xpath=null;
|
|
175 try {
|
|
176 String xpathString="/zs:searchRetrieveResponse/zs:records/zs:record/zs:recordData/escidocItem:item/metadataRecords:md-records/metadataRecords:md-record/publication:publication/dc:identifier";
|
|
177 xpath = EScidocTools.getESciDocXpath(xpathString);
|
|
178 } catch (JDOMException e) {
|
|
179 // TODO Auto-generated catch block
|
|
180 e.printStackTrace();
|
|
181 return null;
|
|
182 }
|
|
183 List<Element> nodes;
|
|
184 try {
|
|
185 nodes= xpath.selectNodes(doc);
|
|
186 } catch (JDOMException e) {
|
|
187 // TODO Auto-generated catch block
|
|
188 e.printStackTrace();
|
|
189 return null;
|
|
190 }
|
|
191 File outAdd = new File("/tmp/outadded.txt");
|
|
192 try {
|
|
193 FileWriter fw = new FileWriter(outAdd);
|
|
194 for (Element el: nodes){
|
|
195 String text = el.getTextTrim();
|
|
196 Pattern x = Pattern.compile("/library/([^/]*)");
|
|
197 logger.debug("found:"+text);
|
|
198 Matcher m = x.matcher(text);
|
|
199 boolean matched = m.find();
|
|
200 if (matched){
|
|
201 String id = m.group(1);
|
|
202 logger.debug("adding:"+id);
|
|
203 retSet.add(id);
|
|
204 fw.write(id+"\n");
|
|
205 }
|
|
206
|
|
207 }
|
|
208 fw.close();
|
|
209 } catch (IOException e) {
|
|
210 // TODO Auto-generated catch block
|
|
211 e.printStackTrace();
|
|
212 System.exit(0);
|
|
213 }
|
|
214 return retSet;
|
|
215 }
|
|
216 }
|