19
|
1 package de.mpiwg.itgroup.eSciDoc.Tools.Pubman;
|
|
2
|
|
3 // geht durch alle eintraege und tested auf personen ids
|
|
4
|
|
5 // gegebenefalls werden sie hinzugefuegt (dazu python tool addconeids aufrufen.
|
|
6 import java.io.BufferedReader;
|
|
7 import java.io.BufferedWriter;
|
|
8 import java.io.File;
|
21
|
9 import java.io.FileInputStream;
|
19
|
10 import java.io.FileNotFoundException;
|
|
11 import java.io.FileOutputStream;
|
|
12 import java.io.FileReader;
|
|
13 import java.io.FileWriter;
|
|
14 import java.io.IOException;
|
21
|
15 import java.io.InputStreamReader;
|
19
|
16 import java.io.OutputStreamWriter;
|
|
17 import java.io.UnsupportedEncodingException;
|
|
18 import java.util.ArrayList;
|
|
19 import java.util.HashMap;
|
|
20 import java.util.List;
|
|
21
|
|
22 import org.apache.http.HttpEntity;
|
|
23 import org.apache.http.HttpResponse;
|
|
24 import org.apache.log4j.Level;
|
|
25 import org.apache.log4j.Logger;
|
|
26 import org.jdom.Document;
|
|
27 import org.jdom.Element;
|
|
28 import org.jdom.JDOMException;
|
|
29 import org.jdom.Namespace;
|
|
30 import org.jdom.xpath.XPath;
|
|
31
|
|
32
|
|
33 import com.sun.xml.internal.xsom.impl.scd.Iterators.Map;
|
|
34
|
|
35 import sun.security.krb5.internal.crypto.Nonce;
|
|
36
|
|
37 import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler;
|
|
38 import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools;
|
|
39 import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException;
|
|
40 import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject;
|
|
41
|
|
42
|
|
43
|
|
44
|
|
45 public class AddConeIds {
|
|
46
|
|
47 /**
|
|
48 * @param connector
|
|
49 * @param args
|
|
50 * @throws ESciDocXmlObjectException
|
|
51 * @throws JDOMException
|
|
52 * @throws IOException
|
|
53 * @throws IllegalStateException
|
|
54 */
|
|
55
|
|
56
|
|
57 public void findPersons(eSciDocXmlObject obj,BufferedWriter out,HashMap<String,List<List<String>>>newTerms, EScidocBasicHandler connector) throws JDOMException, IOException, IllegalStateException, ESciDocXmlObjectException{
|
|
58
|
|
59
|
|
60 Document doc = obj.getDocument();
|
|
61 Boolean changed=false;
|
|
62 XPath personXPath = EScidocTools.getESciDocXpath("//person:person");
|
|
63 @SuppressWarnings("unchecked")
|
|
64 List<Element> persons = personXPath.selectNodes(doc);
|
|
65
|
|
66 XPath familyNameXpath = EScidocTools.getESciDocXpath(".//eterms:family-name");
|
|
67 XPath givenNameXpath = EScidocTools.getESciDocXpath(".//eterms:given-name");
|
|
68 XPath completeNameXpath = EScidocTools.getESciDocXpath(".//eterms:complete-name");
|
|
69
|
|
70 XPath identifierXpath = EScidocTools.getESciDocXpath("./dc:identifier");
|
|
71
|
|
72
|
|
73 for (Element person : persons) {
|
|
74
|
|
75 String famName="";
|
|
76
|
|
77 Element fam = (Element)familyNameXpath.selectSingleNode(person);
|
|
78
|
|
79 if (fam != null){
|
|
80 famName = fam.getTextTrim();
|
|
81 }
|
|
82
|
|
83 Element giv = (Element)givenNameXpath.selectSingleNode(person);
|
|
84
|
|
85 String givName ="";
|
|
86 if (fam != null){
|
|
87 givName = giv.getTextTrim();
|
|
88 }
|
|
89
|
|
90 Element compl = (Element)completeNameXpath.selectSingleNode(person);
|
|
91
|
|
92 String complName="";
|
|
93 if (fam != null){
|
|
94 complName = compl.getTextTrim();
|
|
95 }
|
|
96
|
|
97
|
|
98 List<Element> ids = (List<Element>)identifierXpath.selectNodes(person);
|
|
99
|
|
100
|
|
101 List<String>identifiers = new ArrayList<String> ();
|
|
102 for (Element id : ids){
|
|
103 identifiers.add(id.getTextTrim());
|
|
104
|
|
105 }
|
|
106
|
|
107 for (String identifier : identifiers){
|
|
108 String outStr = String.format("%s,%s,%s,%s,%s\n",obj.getESciDocId(),famName,givName,complName,identifier);
|
|
109 out.write(outStr);
|
|
110 }
|
|
111
|
|
112 if (identifiers.size()==0){
|
|
113 String outStr = String.format("%s,%s,%s,%s,%s\n",obj.getESciDocId(),famName,givName,complName,"");
|
|
114
|
|
115
|
|
116 if (newTerms !=null){ // es existieren neue cone eintrage
|
|
117 if (newTerms.containsKey(obj.getESciDocId())){ // ergŠnzung existiert
|
|
118 for (List<String> entry : newTerms.get(obj.getESciDocId())){ //hole diese
|
|
119 if ( entry.get(0).equals(famName) && entry.get(1).equals(givName)){
|
|
120
|
|
121 int size= entry.size();
|
|
122 String coneId = entry.get(size-1);
|
|
123
|
|
124
|
|
125 Element newIdent = new Element("identifier", EScidocTools.DC);
|
|
126 Namespace ns = Namespace.getNamespace("xsi", "http://www.w3.org/2001/XMLSchema-instance");
|
|
127 newIdent.setAttribute("type", "eterms:CONE",ns);
|
|
128 newIdent.setText(coneId);
|
|
129 person.addContent(newIdent);
|
|
130
|
|
131 //System.out.println(obj.printXML());
|
|
132 connector.updateItem(obj);
|
|
133 HttpResponse retValue = connector.submitAnObject(obj, "changed cone identifiers");
|
|
134 System.out.println(EScidocBasicHandler.convertStreamToString(retValue.getEntity().getContent()));
|
|
135 //
|
|
136 // System.out.println(EScidocBasicHandler.convertStreamToString(retValu.getEntity().getContent()));
|
|
137 //
|
|
138
|
|
139
|
|
140 HttpResponse resObj = connector.eScidocGet(obj.getESciDocId());
|
|
141 HttpEntity ent = resObj.getEntity();
|
|
142 if (ent!=null){
|
|
143 obj= new eSciDocXmlObject(ent.getContent());
|
|
144 } else {
|
|
145 System.out.println("Can not retrieve:" + obj.getESciDocId());
|
|
146 continue;
|
|
147 }
|
|
148
|
|
149
|
|
150 retValue = connector.releaseAnObject(obj, "changed cone identifiers");
|
|
151 System.out.println(EScidocBasicHandler.convertStreamToString(retValue.getEntity().getContent()));
|
|
152 }
|
|
153
|
|
154 }
|
|
155 }
|
|
156
|
|
157 }
|
|
158 out.write(outStr);
|
|
159 }
|
|
160
|
|
161 out.flush();
|
|
162
|
|
163
|
|
164 }
|
|
165
|
|
166
|
|
167 }
|
|
168
|
|
169
|
|
170 public HashMap<String,List<List<String>>> readEscidocToIdentifier() throws IOException{
|
|
171
|
|
172 HashMap<String,List<List<String>>> newTerms = new HashMap<String, List<List<String>>>();
|
|
173
|
21
|
174 String founds = readFileAsString("/tmp/found.csv");
|
19
|
175 String[] splitted = founds.split("\n");
|
|
176 //ir/item/escidoc:647775,McLaughlin,Peter,McLaughlin, Peter,http://pubman.mpiwg-berlin.mpg.de/cone/persons/resource/240
|
|
177 for (int i=0;i<splitted.length;i++){
|
|
178
|
|
179
|
|
180
|
|
181 String line = splitted[i];
|
|
182
|
|
183 String[] lineSplitted = line.split(",");
|
|
184
|
|
185
|
|
186
|
|
187
|
|
188
|
|
189 if (!newTerms.containsKey(lineSplitted[0])){
|
|
190 newTerms.put(lineSplitted[0], new ArrayList<List<String>>());
|
|
191 }
|
|
192
|
|
193 List<List<String>> content = newTerms.get(lineSplitted[0]);
|
|
194
|
|
195
|
|
196 ArrayList<String> entry = new ArrayList<String>();
|
|
197
|
|
198
|
|
199
|
|
200 for (int j=1;j<lineSplitted.length;j++){
|
|
201 entry.add(lineSplitted[j]);
|
|
202 }
|
|
203
|
|
204 content.add(entry);
|
|
205 }
|
|
206
|
|
207
|
|
208 return newTerms;
|
|
209
|
|
210 }
|
|
211
|
|
212
|
|
213 public void findAllPersons(String[] args) throws IllegalStateException, IOException, JDOMException, ESciDocXmlObjectException{
|
|
214 File file = new File("/tmp/ids.csv");
|
|
215
|
|
216 BufferedWriter out =
|
|
217 new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file),"UTF8"));
|
|
218
|
|
219 Logger logger = Logger.getRootLogger();
|
|
220 logger.setLevel(Level.DEBUG);
|
|
221 EScidocBasicHandler connector = new EScidocBasicHandler(
|
21
|
222 "escidoc.mpiwg-berlin.mpg.de", 8080, "dwinter", "SSSS");
|
19
|
223
|
|
224 if(args.length<2){
|
|
225 System.out.println("Usage: startrecord maximumrecords");
|
|
226 System.exit(-1);
|
|
227 }
|
|
228 String MAX_REC = args[1];
|
|
229 String start = args[0];
|
|
230 String objectXPath = "//escidocItem:item";
|
|
231
|
|
232 String query = "?maximumRecords=" + String.valueOf(MAX_REC)
|
|
233 + "&startRecord=" + String.valueOf(start);
|
|
234 String command = "/ir/context/escidoc:38279/resources/members";
|
|
235 for (eSciDocXmlObject obj : connector.getObjectsFromFilterResult(
|
|
236 command + query, objectXPath)) {
|
|
237
|
|
238 System.out.println(obj.getESciDocId());
|
|
239 HashMap<String, List<List<String>>> newTerms = readEscidocToIdentifier();
|
|
240 findPersons(obj,out,newTerms,connector);
|
|
241
|
|
242
|
|
243
|
|
244 // if (changed){
|
|
245 //
|
|
246 // Boolean retVal = connector.updateItem(obj);
|
|
247 // System.out.println("Replaced:"+obj.getESciDocId());
|
|
248 // HttpResponse retValu = connector.submitAnObject(obj, "changed cone identifiers");
|
|
249 //
|
|
250 // System.out.println(EScidocBasicHandler.convertStreamToString(retValu.getEntity().getContent()));
|
|
251 // HttpResponse resObj = connector.eScidocGet(obj.getESciDocId());
|
|
252 // HttpEntity ent = resObj.getEntity();
|
|
253 // if (ent!=null){
|
|
254 // obj= new eSciDocXmlObject(ent.getContent());
|
|
255 // } else {
|
|
256 // System.out.println("Can not retrieve:" + obj.getESciDocId());
|
|
257 // continue;
|
|
258 // }
|
|
259 //
|
|
260 // HttpResponse reValue2 = connector.releaseAnObject(obj, "changed cone identifiers");
|
|
261 // System.out.println(EScidocBasicHandler.convertStreamToString(reValue2.getEntity().getContent()));
|
|
262 //
|
|
263 //
|
|
264 // }
|
|
265 }
|
|
266 out.close();
|
|
267 }
|
|
268
|
|
269
|
|
270 private String readFileAsString(String filePath) throws IOException {
|
|
271 StringBuffer fileData = new StringBuffer();
|
|
272 BufferedReader reader = new BufferedReader(
|
21
|
273 new InputStreamReader(
|
|
274 new FileInputStream(filePath), "UTF8"));
|
19
|
275 char[] buf = new char[1024];
|
|
276 int numRead=0;
|
|
277 while((numRead=reader.read(buf)) != -1){
|
|
278 String readData = String.valueOf(buf, 0, numRead);
|
|
279 fileData.append(readData);
|
|
280 }
|
|
281 reader.close();
|
|
282 return fileData.toString();
|
|
283 }
|
|
284
|
|
285 public static void main(String[] args) throws IllegalStateException,
|
|
286 IOException, JDOMException, ESciDocXmlObjectException {
|
|
287
|
|
288
|
|
289 AddConeIds adders = new AddConeIds();
|
|
290
|
|
291 adders.findAllPersons(args);
|
|
292
|
|
293 }
|
|
294
|
|
295 }
|