annotate src/de/mpiwg/itgroup/eSciDoc/Tools/AddMPIWGIdentifiers.java @ 8:a844f6948dd8

?nderungen im Walker tools f?r pubman
author dwinter
date Mon, 14 May 2012 09:58:45 +0200
parents
children 9164f3f4b232
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
1 package de.mpiwg.itgroup.eSciDoc.Tools;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
2
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
3 import java.io.IOException;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
4 import java.util.HashMap;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
5 import java.util.List;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
6
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
7 import org.apache.http.HttpEntity;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
8 import org.apache.http.HttpResponse;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
9 import org.apache.http.client.ClientProtocolException;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
10 import org.apache.log4j.BasicConfigurator;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
11 import org.apache.log4j.Level;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
12 import org.apache.log4j.Logger;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
13 import org.jdom.Attribute;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
14 import org.jdom.Document;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
15 import org.jdom.Element;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
16 import org.jdom.JDOMException;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
17 import org.jdom.input.SAXBuilder;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
18 import org.jdom.xpath.XPath;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
19
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
20 import sun.util.logging.resources.logging;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
21
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
22 import com.sun.org.apache.xerces.internal.parsers.SAXParser;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
23
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
24 import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
25 import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
26
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
27 public class AddMPIWGIdentifiers {
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
28
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
29 /**
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
30 * Erzeuge MPIWG identifier in den administrativen MPIWG Metadaten.
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
31 * Solange der Identifier nicht in den Metadaten (index.meta) steht, wird dazu der Filename genommen.
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
32 * @param args
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
33 * @throws ESciDocXmlObjectException
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
34 * @throws JDOMException
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
35 * @throws IOException
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
36 * @throws ClientProtocolException
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
37 * @throws IllegalStateException
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
38 */
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
39 public static void main(String[] args) throws IllegalStateException, ClientProtocolException, IOException, JDOMException, ESciDocXmlObjectException {
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
40 // TODO Auto-generated method stub
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
41 BasicConfigurator.configure();
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
42 Logger logger = Logger.getRootLogger();
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
43 logger.setLevel(Level.INFO);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
44 EScidocBasicHandler connector = new EScidocBasicHandler(
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
45 "escidoc-test.mpiwg-berlin.mpg.de", 8080, "dwinter", "fl0rian");
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
46
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
47 if(args.length<2){
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
48 System.out.println("Usage: startrecord maximumrecords");
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
49 System.exit(-1);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
50 }
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
51 String MAX_REC = args[1];
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
52 String start = args[0];
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
53 String objectXPath = "//escidocItem:item";
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
54
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
55 String query = "?maximumRecords=" + String.valueOf(MAX_REC)
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
56 + "&startRecord=" + String.valueOf(start);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
57 String command = "/ir/context/escidoc:12001/resources/members";
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
58 for (eSciDocXmlObject obj : connector.getObjectsFromFilterResult(
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
59 command + query, objectXPath)) {
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
60
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
61 Document doc = obj.getDocument();
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
62 Boolean changed=false;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
63 XPath archivePathXP = EScidocTools.getESciDocXpath("//mpiwg:admin/mpiwg:archivePath");
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
64 XPath mpiwgMDXP= EScidocTools.getESciDocXpath("//mpiwg:admin");
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
65 XPath mpiwgIdentifierXP= EScidocTools.getESciDocXpath("//mpiwg:admin/mpiwg:identifier");
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
66
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
67
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
68 Element archivePathNode = (Element) archivePathXP.selectSingleNode(doc);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
69 if(archivePathNode==null) // kein Treffer
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
70 continue;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
71
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
72 Element mpiwgIDNode = (Element) mpiwgIdentifierXP.selectSingleNode(doc);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
73 if(mpiwgIDNode!=null){ // kein Treffer
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
74 System.out.println("ID already exists:"+mpiwgIDNode.getTextTrim());
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
75 continue;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
76 }
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
77
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
78
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
79
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
80
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
81 String archivePath=archivePathNode.getTextTrim();
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
82 String[] pathElements=archivePath.split("/");
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
83 int length=pathElements.length;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
84 String mpiwgID=pathElements[length-1];
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
85 Element mpiwgMDNode= (Element) mpiwgMDXP.selectSingleNode(doc);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
86
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
87 if(mpiwgID.equals(""))
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
88 mpiwgID=handleEmptyID(doc,mpiwgMDNode);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
89
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
90
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
91
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
92
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
93 mpiwgIDNode= new Element("identifier", EScidocTools.MPIWG);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
94 mpiwgIDNode.setText(mpiwgID);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
95 mpiwgMDNode.addContent(mpiwgIDNode);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
96
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
97 //correct release number, some objecte don't have an release number, why??
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
98
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
99
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
100 Element latestRelease = (Element)EScidocBasicHandler.getXPath(doc.getRootElement(),"//escidocItem:properties/prop:latest-release/release:number",true);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
101 if(latestRelease==null){
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
102 logger.info("not released yet:"+obj.getESciDocId());
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
103 } else {
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
104 String rn= latestRelease.getTextTrim();
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
105 if (rn.equals("")){
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
106 String latestVersionNumber = ((Element)EScidocBasicHandler.getXPath(doc.getRootElement(),"//escidocItem:properties/prop:version/version:number",true)).getTextTrim();
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
107 logger.info("have to add relase number!");
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
108 logger.info("Will add:"+latestVersionNumber);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
109
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
110 latestRelease.setText(latestVersionNumber);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
111 }
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
112 }
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
113 System.out.println("Adding:"+mpiwgID);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
114 changed=true;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
115
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
116
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
117 //System.out.println(obj.printXML());
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
118 if (changed){
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
119
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
120 Boolean retVal = connector.updateItem(obj);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
121 System.out.println("Replaced:"+obj.getESciDocId());
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
122 HttpResponse retValu = connector.submitAnObject(obj, "adding identifier");
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
123
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
124 System.out.println(EScidocBasicHandler.convertStreamToString(retValu.getEntity().getContent()));
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
125 HttpResponse resObj = connector.eScidocGet(obj.getESciDocId());
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
126 HttpEntity ent = resObj.getEntity();
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
127 if (ent!=null){
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
128 obj= new eSciDocXmlObject(ent.getContent());
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
129 } else {
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
130 System.out.println("Can not retrieve:" + obj.getESciDocId());
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
131 continue;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
132 }
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
133
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
134 HttpResponse reValue2 = connector.releaseAnObject(obj, "repairing publishing info");
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
135 System.out.println(EScidocBasicHandler.convertStreamToString(reValue2.getEntity().getContent()));
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
136
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
137 }
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
138 }
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
139
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
140 }
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
141
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
142 private static String handleEmptyID(Document doc, Element mpiwgMDNode) throws JDOMException, IOException {
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
143 XPath xlinkXP = EScidocTools.getESciDocXpath("//escidocComponents:component[escidocComponents:properties/prop:content-category/text()='index_meta']/@xlink:href");
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
144
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
145 SAXBuilder sb = new SAXBuilder();
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
146 Object node = xlinkXP.selectSingleNode(doc);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
147 Attribute attr =(Attribute) node;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
148 String md = attr.getValue();
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
149
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
150 Document doc2 = sb.build("http://escidoc-test.mpiwg-berlin.mpg.de:8080"+md+"/content");
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
151 XPath apXP= EScidocTools.getESciDocXpath("//resource/archive-path");
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
152 XPath imXP= EScidocTools.getESciDocXpath("//texttool/image");
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
153
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
154 Element apNode = (Element)apXP.selectSingleNode(doc2);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
155 String ap="";
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
156 if(apNode!=null)
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
157 {
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
158 ap = apNode.getTextTrim();
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
159 XPath apNewXP= EScidocTools.getESciDocXpath(".//mpiwg:archivePath");
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
160 Element apNewNode = (Element) apNewXP.selectSingleNode(mpiwgMDNode);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
161 apNewNode.setText(ap);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
162 }
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
163 Element imNode = (Element)imXP.selectSingleNode(doc2);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
164
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
165 if(imNode!=null)
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
166 {
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
167 String im = imNode.getTextTrim();
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
168 XPath imNewXP= EScidocTools.getESciDocXpath(".//mpiwg:imageFolder");
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
169 Element imNewNode = (Element) imNewXP.selectSingleNode(mpiwgMDNode);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
170 imNewNode.setText(ap+"/"+im);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
171 }
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
172
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
173 String[] pathElements=ap.split("/");
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
174 int length=pathElements.length;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
175 return pathElements[length-1];
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
176
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
177
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
178 }
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
179 }