comparison src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/AddConeIds.java @ 19:671c6e3449f7

add coneids added
author dwinter
date Mon, 27 May 2013 15:06:53 +0200
parents
children d1f63ee9998d
comparison
equal deleted inserted replaced
18:c201d58997e5 19:671c6e3449f7
1 package de.mpiwg.itgroup.eSciDoc.Tools.Pubman;
2
3 // geht durch alle eintraege und tested auf personen ids
4
5 // gegebenefalls werden sie hinzugefuegt (dazu python tool addconeids aufrufen.
6 import java.io.BufferedReader;
7 import java.io.BufferedWriter;
8 import java.io.File;
9 import java.io.FileNotFoundException;
10 import java.io.FileOutputStream;
11 import java.io.FileReader;
12 import java.io.FileWriter;
13 import java.io.IOException;
14 import java.io.OutputStreamWriter;
15 import java.io.UnsupportedEncodingException;
16 import java.util.ArrayList;
17 import java.util.HashMap;
18 import java.util.List;
19
20 import org.apache.http.HttpEntity;
21 import org.apache.http.HttpResponse;
22 import org.apache.log4j.Level;
23 import org.apache.log4j.Logger;
24 import org.jdom.Document;
25 import org.jdom.Element;
26 import org.jdom.JDOMException;
27 import org.jdom.Namespace;
28 import org.jdom.xpath.XPath;
29
30
31 import com.sun.xml.internal.xsom.impl.scd.Iterators.Map;
32
33 import sun.security.krb5.internal.crypto.Nonce;
34
35 import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler;
36 import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools;
37 import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException;
38 import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject;
39
40
41
42
43 public class AddConeIds {
44
45 /**
46 * @param connector
47 * @param args
48 * @throws ESciDocXmlObjectException
49 * @throws JDOMException
50 * @throws IOException
51 * @throws IllegalStateException
52 */
53
54
55 public void findPersons(eSciDocXmlObject obj,BufferedWriter out,HashMap<String,List<List<String>>>newTerms, EScidocBasicHandler connector) throws JDOMException, IOException, IllegalStateException, ESciDocXmlObjectException{
56
57
58 Document doc = obj.getDocument();
59 Boolean changed=false;
60 XPath personXPath = EScidocTools.getESciDocXpath("//person:person");
61 @SuppressWarnings("unchecked")
62 List<Element> persons = personXPath.selectNodes(doc);
63
64 XPath familyNameXpath = EScidocTools.getESciDocXpath(".//eterms:family-name");
65 XPath givenNameXpath = EScidocTools.getESciDocXpath(".//eterms:given-name");
66 XPath completeNameXpath = EScidocTools.getESciDocXpath(".//eterms:complete-name");
67
68 XPath identifierXpath = EScidocTools.getESciDocXpath("./dc:identifier");
69
70
71 for (Element person : persons) {
72
73 String famName="";
74
75 Element fam = (Element)familyNameXpath.selectSingleNode(person);
76
77 if (fam != null){
78 famName = fam.getTextTrim();
79 }
80
81 Element giv = (Element)givenNameXpath.selectSingleNode(person);
82
83 String givName ="";
84 if (fam != null){
85 givName = giv.getTextTrim();
86 }
87
88 Element compl = (Element)completeNameXpath.selectSingleNode(person);
89
90 String complName="";
91 if (fam != null){
92 complName = compl.getTextTrim();
93 }
94
95
96 List<Element> ids = (List<Element>)identifierXpath.selectNodes(person);
97
98
99 List<String>identifiers = new ArrayList<String> ();
100 for (Element id : ids){
101 identifiers.add(id.getTextTrim());
102
103 }
104
105 for (String identifier : identifiers){
106 String outStr = String.format("%s,%s,%s,%s,%s\n",obj.getESciDocId(),famName,givName,complName,identifier);
107 out.write(outStr);
108 }
109
110 if (identifiers.size()==0){
111 String outStr = String.format("%s,%s,%s,%s,%s\n",obj.getESciDocId(),famName,givName,complName,"");
112
113
114 if (newTerms !=null){ // es existieren neue cone eintrage
115 if (newTerms.containsKey(obj.getESciDocId())){ // ergŠnzung existiert
116 for (List<String> entry : newTerms.get(obj.getESciDocId())){ //hole diese
117 if ( entry.get(0).equals(famName) && entry.get(1).equals(givName)){
118
119 int size= entry.size();
120 String coneId = entry.get(size-1);
121
122
123 Element newIdent = new Element("identifier", EScidocTools.DC);
124 Namespace ns = Namespace.getNamespace("xsi", "http://www.w3.org/2001/XMLSchema-instance");
125 newIdent.setAttribute("type", "eterms:CONE",ns);
126 newIdent.setText(coneId);
127 person.addContent(newIdent);
128
129 //System.out.println(obj.printXML());
130 connector.updateItem(obj);
131 HttpResponse retValue = connector.submitAnObject(obj, "changed cone identifiers");
132 System.out.println(EScidocBasicHandler.convertStreamToString(retValue.getEntity().getContent()));
133 //
134 // System.out.println(EScidocBasicHandler.convertStreamToString(retValu.getEntity().getContent()));
135 //
136
137
138 HttpResponse resObj = connector.eScidocGet(obj.getESciDocId());
139 HttpEntity ent = resObj.getEntity();
140 if (ent!=null){
141 obj= new eSciDocXmlObject(ent.getContent());
142 } else {
143 System.out.println("Can not retrieve:" + obj.getESciDocId());
144 continue;
145 }
146
147
148 retValue = connector.releaseAnObject(obj, "changed cone identifiers");
149 System.out.println(EScidocBasicHandler.convertStreamToString(retValue.getEntity().getContent()));
150 }
151
152 }
153 }
154
155 }
156 out.write(outStr);
157 }
158
159 out.flush();
160
161
162 }
163
164
165 }
166
167
168 public HashMap<String,List<List<String>>> readEscidocToIdentifier() throws IOException{
169
170 HashMap<String,List<List<String>>> newTerms = new HashMap<String, List<List<String>>>();
171
172 String founds = readFileAsString("./tmp/found.csv");
173 String[] splitted = founds.split("\n");
174 //ir/item/escidoc:647775,McLaughlin,Peter,McLaughlin, Peter,http://pubman.mpiwg-berlin.mpg.de/cone/persons/resource/240
175 for (int i=0;i<splitted.length;i++){
176
177
178
179 String line = splitted[i];
180
181 String[] lineSplitted = line.split(",");
182
183
184
185
186
187 if (!newTerms.containsKey(lineSplitted[0])){
188 newTerms.put(lineSplitted[0], new ArrayList<List<String>>());
189 }
190
191 List<List<String>> content = newTerms.get(lineSplitted[0]);
192
193
194 ArrayList<String> entry = new ArrayList<String>();
195
196
197
198 for (int j=1;j<lineSplitted.length;j++){
199 entry.add(lineSplitted[j]);
200 }
201
202 content.add(entry);
203 }
204
205
206 return newTerms;
207
208 }
209
210
211 public void findAllPersons(String[] args) throws IllegalStateException, IOException, JDOMException, ESciDocXmlObjectException{
212 File file = new File("/tmp/ids.csv");
213
214 BufferedWriter out =
215 new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file),"UTF8"));
216
217 Logger logger = Logger.getRootLogger();
218 logger.setLevel(Level.DEBUG);
219 EScidocBasicHandler connector = new EScidocBasicHandler(
220 "escidoc.mpiwg-berlin.mpg.de", 8080, "itgroup", "XXX");
221
222 if(args.length<2){
223 System.out.println("Usage: startrecord maximumrecords");
224 System.exit(-1);
225 }
226 String MAX_REC = args[1];
227 String start = args[0];
228 String objectXPath = "//escidocItem:item";
229
230 String query = "?maximumRecords=" + String.valueOf(MAX_REC)
231 + "&startRecord=" + String.valueOf(start);
232 String command = "/ir/context/escidoc:38279/resources/members";
233 for (eSciDocXmlObject obj : connector.getObjectsFromFilterResult(
234 command + query, objectXPath)) {
235
236 System.out.println(obj.getESciDocId());
237 HashMap<String, List<List<String>>> newTerms = readEscidocToIdentifier();
238 findPersons(obj,out,newTerms,connector);
239
240
241
242 // if (changed){
243 //
244 // Boolean retVal = connector.updateItem(obj);
245 // System.out.println("Replaced:"+obj.getESciDocId());
246 // HttpResponse retValu = connector.submitAnObject(obj, "changed cone identifiers");
247 //
248 // System.out.println(EScidocBasicHandler.convertStreamToString(retValu.getEntity().getContent()));
249 // HttpResponse resObj = connector.eScidocGet(obj.getESciDocId());
250 // HttpEntity ent = resObj.getEntity();
251 // if (ent!=null){
252 // obj= new eSciDocXmlObject(ent.getContent());
253 // } else {
254 // System.out.println("Can not retrieve:" + obj.getESciDocId());
255 // continue;
256 // }
257 //
258 // HttpResponse reValue2 = connector.releaseAnObject(obj, "changed cone identifiers");
259 // System.out.println(EScidocBasicHandler.convertStreamToString(reValue2.getEntity().getContent()));
260 //
261 //
262 // }
263 }
264 out.close();
265 }
266
267
268 private String readFileAsString(String filePath) throws IOException {
269 StringBuffer fileData = new StringBuffer();
270 BufferedReader reader = new BufferedReader(
271 new FileReader(filePath));
272 char[] buf = new char[1024];
273 int numRead=0;
274 while((numRead=reader.read(buf)) != -1){
275 String readData = String.valueOf(buf, 0, numRead);
276 fileData.append(readData);
277 }
278 reader.close();
279 return fileData.toString();
280 }
281
282 public static void main(String[] args) throws IllegalStateException,
283 IOException, JDOMException, ESciDocXmlObjectException {
284
285
286 AddConeIds adders = new AddConeIds();
287
288 adders.findAllPersons(args);
289
290 }
291
292 }