Mercurial > hg > eSciDocImport
comparison src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/AddConeIds.java @ 19:671c6e3449f7
add coneids added
author | dwinter |
---|---|
date | Mon, 27 May 2013 15:06:53 +0200 |
parents | |
children | d1f63ee9998d |
comparison
equal
deleted
inserted
replaced
18:c201d58997e5 | 19:671c6e3449f7 |
---|---|
1 package de.mpiwg.itgroup.eSciDoc.Tools.Pubman; | |
2 | |
3 // geht durch alle eintraege und tested auf personen ids | |
4 | |
5 // gegebenefalls werden sie hinzugefuegt (dazu python tool addconeids aufrufen. | |
6 import java.io.BufferedReader; | |
7 import java.io.BufferedWriter; | |
8 import java.io.File; | |
9 import java.io.FileNotFoundException; | |
10 import java.io.FileOutputStream; | |
11 import java.io.FileReader; | |
12 import java.io.FileWriter; | |
13 import java.io.IOException; | |
14 import java.io.OutputStreamWriter; | |
15 import java.io.UnsupportedEncodingException; | |
16 import java.util.ArrayList; | |
17 import java.util.HashMap; | |
18 import java.util.List; | |
19 | |
20 import org.apache.http.HttpEntity; | |
21 import org.apache.http.HttpResponse; | |
22 import org.apache.log4j.Level; | |
23 import org.apache.log4j.Logger; | |
24 import org.jdom.Document; | |
25 import org.jdom.Element; | |
26 import org.jdom.JDOMException; | |
27 import org.jdom.Namespace; | |
28 import org.jdom.xpath.XPath; | |
29 | |
30 | |
31 import com.sun.xml.internal.xsom.impl.scd.Iterators.Map; | |
32 | |
33 import sun.security.krb5.internal.crypto.Nonce; | |
34 | |
35 import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler; | |
36 import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools; | |
37 import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException; | |
38 import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject; | |
39 | |
40 | |
41 | |
42 | |
43 public class AddConeIds { | |
44 | |
45 /** | |
46 * @param connector | |
47 * @param args | |
48 * @throws ESciDocXmlObjectException | |
49 * @throws JDOMException | |
50 * @throws IOException | |
51 * @throws IllegalStateException | |
52 */ | |
53 | |
54 | |
55 public void findPersons(eSciDocXmlObject obj,BufferedWriter out,HashMap<String,List<List<String>>>newTerms, EScidocBasicHandler connector) throws JDOMException, IOException, IllegalStateException, ESciDocXmlObjectException{ | |
56 | |
57 | |
58 Document doc = obj.getDocument(); | |
59 Boolean changed=false; | |
60 XPath personXPath = EScidocTools.getESciDocXpath("//person:person"); | |
61 @SuppressWarnings("unchecked") | |
62 List<Element> persons = personXPath.selectNodes(doc); | |
63 | |
64 XPath familyNameXpath = EScidocTools.getESciDocXpath(".//eterms:family-name"); | |
65 XPath givenNameXpath = EScidocTools.getESciDocXpath(".//eterms:given-name"); | |
66 XPath completeNameXpath = EScidocTools.getESciDocXpath(".//eterms:complete-name"); | |
67 | |
68 XPath identifierXpath = EScidocTools.getESciDocXpath("./dc:identifier"); | |
69 | |
70 | |
71 for (Element person : persons) { | |
72 | |
73 String famName=""; | |
74 | |
75 Element fam = (Element)familyNameXpath.selectSingleNode(person); | |
76 | |
77 if (fam != null){ | |
78 famName = fam.getTextTrim(); | |
79 } | |
80 | |
81 Element giv = (Element)givenNameXpath.selectSingleNode(person); | |
82 | |
83 String givName =""; | |
84 if (fam != null){ | |
85 givName = giv.getTextTrim(); | |
86 } | |
87 | |
88 Element compl = (Element)completeNameXpath.selectSingleNode(person); | |
89 | |
90 String complName=""; | |
91 if (fam != null){ | |
92 complName = compl.getTextTrim(); | |
93 } | |
94 | |
95 | |
96 List<Element> ids = (List<Element>)identifierXpath.selectNodes(person); | |
97 | |
98 | |
99 List<String>identifiers = new ArrayList<String> (); | |
100 for (Element id : ids){ | |
101 identifiers.add(id.getTextTrim()); | |
102 | |
103 } | |
104 | |
105 for (String identifier : identifiers){ | |
106 String outStr = String.format("%s,%s,%s,%s,%s\n",obj.getESciDocId(),famName,givName,complName,identifier); | |
107 out.write(outStr); | |
108 } | |
109 | |
110 if (identifiers.size()==0){ | |
111 String outStr = String.format("%s,%s,%s,%s,%s\n",obj.getESciDocId(),famName,givName,complName,""); | |
112 | |
113 | |
114 if (newTerms !=null){ // es existieren neue cone eintrage | |
115 if (newTerms.containsKey(obj.getESciDocId())){ // ergŠnzung existiert | |
116 for (List<String> entry : newTerms.get(obj.getESciDocId())){ //hole diese | |
117 if ( entry.get(0).equals(famName) && entry.get(1).equals(givName)){ | |
118 | |
119 int size= entry.size(); | |
120 String coneId = entry.get(size-1); | |
121 | |
122 | |
123 Element newIdent = new Element("identifier", EScidocTools.DC); | |
124 Namespace ns = Namespace.getNamespace("xsi", "http://www.w3.org/2001/XMLSchema-instance"); | |
125 newIdent.setAttribute("type", "eterms:CONE",ns); | |
126 newIdent.setText(coneId); | |
127 person.addContent(newIdent); | |
128 | |
129 //System.out.println(obj.printXML()); | |
130 connector.updateItem(obj); | |
131 HttpResponse retValue = connector.submitAnObject(obj, "changed cone identifiers"); | |
132 System.out.println(EScidocBasicHandler.convertStreamToString(retValue.getEntity().getContent())); | |
133 // | |
134 // System.out.println(EScidocBasicHandler.convertStreamToString(retValu.getEntity().getContent())); | |
135 // | |
136 | |
137 | |
138 HttpResponse resObj = connector.eScidocGet(obj.getESciDocId()); | |
139 HttpEntity ent = resObj.getEntity(); | |
140 if (ent!=null){ | |
141 obj= new eSciDocXmlObject(ent.getContent()); | |
142 } else { | |
143 System.out.println("Can not retrieve:" + obj.getESciDocId()); | |
144 continue; | |
145 } | |
146 | |
147 | |
148 retValue = connector.releaseAnObject(obj, "changed cone identifiers"); | |
149 System.out.println(EScidocBasicHandler.convertStreamToString(retValue.getEntity().getContent())); | |
150 } | |
151 | |
152 } | |
153 } | |
154 | |
155 } | |
156 out.write(outStr); | |
157 } | |
158 | |
159 out.flush(); | |
160 | |
161 | |
162 } | |
163 | |
164 | |
165 } | |
166 | |
167 | |
168 public HashMap<String,List<List<String>>> readEscidocToIdentifier() throws IOException{ | |
169 | |
170 HashMap<String,List<List<String>>> newTerms = new HashMap<String, List<List<String>>>(); | |
171 | |
172 String founds = readFileAsString("./tmp/found.csv"); | |
173 String[] splitted = founds.split("\n"); | |
174 //ir/item/escidoc:647775,McLaughlin,Peter,McLaughlin, Peter,http://pubman.mpiwg-berlin.mpg.de/cone/persons/resource/240 | |
175 for (int i=0;i<splitted.length;i++){ | |
176 | |
177 | |
178 | |
179 String line = splitted[i]; | |
180 | |
181 String[] lineSplitted = line.split(","); | |
182 | |
183 | |
184 | |
185 | |
186 | |
187 if (!newTerms.containsKey(lineSplitted[0])){ | |
188 newTerms.put(lineSplitted[0], new ArrayList<List<String>>()); | |
189 } | |
190 | |
191 List<List<String>> content = newTerms.get(lineSplitted[0]); | |
192 | |
193 | |
194 ArrayList<String> entry = new ArrayList<String>(); | |
195 | |
196 | |
197 | |
198 for (int j=1;j<lineSplitted.length;j++){ | |
199 entry.add(lineSplitted[j]); | |
200 } | |
201 | |
202 content.add(entry); | |
203 } | |
204 | |
205 | |
206 return newTerms; | |
207 | |
208 } | |
209 | |
210 | |
211 public void findAllPersons(String[] args) throws IllegalStateException, IOException, JDOMException, ESciDocXmlObjectException{ | |
212 File file = new File("/tmp/ids.csv"); | |
213 | |
214 BufferedWriter out = | |
215 new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file),"UTF8")); | |
216 | |
217 Logger logger = Logger.getRootLogger(); | |
218 logger.setLevel(Level.DEBUG); | |
219 EScidocBasicHandler connector = new EScidocBasicHandler( | |
220 "escidoc.mpiwg-berlin.mpg.de", 8080, "itgroup", "XXX"); | |
221 | |
222 if(args.length<2){ | |
223 System.out.println("Usage: startrecord maximumrecords"); | |
224 System.exit(-1); | |
225 } | |
226 String MAX_REC = args[1]; | |
227 String start = args[0]; | |
228 String objectXPath = "//escidocItem:item"; | |
229 | |
230 String query = "?maximumRecords=" + String.valueOf(MAX_REC) | |
231 + "&startRecord=" + String.valueOf(start); | |
232 String command = "/ir/context/escidoc:38279/resources/members"; | |
233 for (eSciDocXmlObject obj : connector.getObjectsFromFilterResult( | |
234 command + query, objectXPath)) { | |
235 | |
236 System.out.println(obj.getESciDocId()); | |
237 HashMap<String, List<List<String>>> newTerms = readEscidocToIdentifier(); | |
238 findPersons(obj,out,newTerms,connector); | |
239 | |
240 | |
241 | |
242 // if (changed){ | |
243 // | |
244 // Boolean retVal = connector.updateItem(obj); | |
245 // System.out.println("Replaced:"+obj.getESciDocId()); | |
246 // HttpResponse retValu = connector.submitAnObject(obj, "changed cone identifiers"); | |
247 // | |
248 // System.out.println(EScidocBasicHandler.convertStreamToString(retValu.getEntity().getContent())); | |
249 // HttpResponse resObj = connector.eScidocGet(obj.getESciDocId()); | |
250 // HttpEntity ent = resObj.getEntity(); | |
251 // if (ent!=null){ | |
252 // obj= new eSciDocXmlObject(ent.getContent()); | |
253 // } else { | |
254 // System.out.println("Can not retrieve:" + obj.getESciDocId()); | |
255 // continue; | |
256 // } | |
257 // | |
258 // HttpResponse reValue2 = connector.releaseAnObject(obj, "changed cone identifiers"); | |
259 // System.out.println(EScidocBasicHandler.convertStreamToString(reValue2.getEntity().getContent())); | |
260 // | |
261 // | |
262 // } | |
263 } | |
264 out.close(); | |
265 } | |
266 | |
267 | |
268 private String readFileAsString(String filePath) throws IOException { | |
269 StringBuffer fileData = new StringBuffer(); | |
270 BufferedReader reader = new BufferedReader( | |
271 new FileReader(filePath)); | |
272 char[] buf = new char[1024]; | |
273 int numRead=0; | |
274 while((numRead=reader.read(buf)) != -1){ | |
275 String readData = String.valueOf(buf, 0, numRead); | |
276 fileData.append(readData); | |
277 } | |
278 reader.close(); | |
279 return fileData.toString(); | |
280 } | |
281 | |
282 public static void main(String[] args) throws IllegalStateException, | |
283 IOException, JDOMException, ESciDocXmlObjectException { | |
284 | |
285 | |
286 AddConeIds adders = new AddConeIds(); | |
287 | |
288 adders.findAllPersons(args); | |
289 | |
290 } | |
291 | |
292 } |