annotate src/de/mpiwg/itgroup/eSciDoc/Tools/Pubman/PubmanFoxridgeIdentifierRelationModell.java @ 15:e0efd3a9d2f0

changes for escidoc1.4
author dwinter
date Mon, 17 Sep 2012 10:21:04 +0200
parents a844f6948dd8
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
1 package de.mpiwg.itgroup.eSciDoc.Tools.Pubman;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
2
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
3 import java.io.File;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
4 import java.io.FileWriter;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
5 import java.io.IOException;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
6 import java.net.URLEncoder;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
7 import java.util.HashSet;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
8 import java.util.List;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
9 import java.util.Set;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
10 import java.util.regex.Matcher;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
11 import java.util.regex.Pattern;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
12
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
13 import javax.xml.parsers.SAXParser;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
14
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
15 import org.apache.http.HttpEntity;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
16 import org.apache.http.HttpResponse;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
17 import org.apache.http.client.ClientProtocolException;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
18 import org.apache.http.client.methods.HttpGet;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
19 import org.apache.http.impl.client.DefaultHttpClient;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
20 import org.apache.log4j.BasicConfigurator;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
21 import org.apache.log4j.Level;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
22 import org.apache.log4j.Logger;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
23 import org.jdom.Document;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
24 import org.jdom.Element;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
25 import org.jdom.JDOMException;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
26 import org.jdom.input.SAXBuilder;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
27 import org.jdom.xpath.XPath;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
28
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
29 import sun.util.logging.resources.logging;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
30
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
31 import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
32 import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
33 import de.mpiwg.itgroup.eSciDoc.exceptions.ESciDocXmlObjectException;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
34 import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
35
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
36 /**
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
37 * Diese Klasse sucht aus den Pfaden im Pubman Eintrag der URL zu den Quellen den Identifier heraus und findet
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
38 * den zugehörigen Eintrag in den von der Foxridge in eScidoc geharvesten Metadaten.
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
39 *
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
40 * Daraus wird dann ein Container erstellt.
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
41 * @author dwinter
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
42 *
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
43 */
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
44 public class PubmanFoxridgeIdentifierRelationModell {
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
45
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
46 Logger logger = Logger.getRootLogger();
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
47 public PubmanFoxridgeIdentifierRelationModell(){
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
48
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
49 }
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
50
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
51 public static void main(String[] args) throws IllegalStateException, IOException, JDOMException, ESciDocXmlObjectException{
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
52 String dateString="2011-06-08T11:48:50.287Z";
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
53
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
54 String itemString="/ir/item/escidoc:162177";
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
55
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
56 String escidocServer="escidoc-test.mpiwg-berlin.mpg.de";
15
e0efd3a9d2f0 changes for escidoc1.4
dwinter
parents: 8
diff changeset
57 EScidocBasicHandler hd = new EScidocBasicHandler(escidocServer, 8080, "dwinter", "XXX");
8
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
58
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
59 if (args.length>0){
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
60 dateString=args[0];
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
61 }
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
62
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
63 if (!hd.isCurrent(itemString,dateString)){
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
64 System.err.println("not the last version!");
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
65 System.exit(0);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
66 }
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
67
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
68
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
69 List<String> existingRelations = hd.getIdsOfRelationFromObject(itemString,"http://www.escidoc.de/ontologies/mpdl-ontologies/content-relations#hasMember");
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
70
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
71 String contextMembers="http://escidoc.mpiwg-berlin.mpg.de:8080/ir/context/escidoc:55281/resources/members";
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
72
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
73
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
74
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
75 BasicConfigurator.configure();
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
76 Logger.getRootLogger().setLevel(Level.ERROR);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
77
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
78 PubmanFoxridgeIdentifierRelationModell pi = new PubmanFoxridgeIdentifierRelationModell();
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
79
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
80 Set<String> identifiers = pi.getIdentifiersFromPubmanPath(contextMembers);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
81
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
82 Set <String> ids = new HashSet<String>();
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
83 int counter=0;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
84 for (String id: identifiers){
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
85
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
86
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
87 String command=String.format("/ir/items?maximumRecords=1&operation=searchRetrieve&version=1.1&query=%%22%%2Fmd-records%%2Fmd-record%%2Fadmin%%2Fidentifier%%22%%3D%%22%s%%22", id);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
88 //command+="%20and%20%22%2Fproperties%2Fcontext%2Fid%22%3d%22escidoc:1001%22";
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
89 //command+="%20and%20%22%2Fproperties%2Fcontext%2Fid%22%3d%22escidoc:40001%22";
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
90 //command+="%20and%20%22%2Fproperties%2Fcontext%2Fid%22%3d%22escidoc:12001%22";
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
91 System.out.println(command);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
92 List<eSciDocXmlObject> objects = hd.getObjectsFromFilterResult(command, "/zs:searchRetrieveResponse/zs:records/zs:record/zs:recordData/escidocItem:item");
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
93 System.out.println("found");
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
94 for (eSciDocXmlObject obj:objects){
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
95 System.out.println("adding:");
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
96
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
97 String addObjId = obj.getESciDocId();
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
98 if(existingRelations.contains(addObjId)){
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
99 System.out.println("already in relations:"+addObjId);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
100 } else {
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
101
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
102 ids.add(addObjId.replace("/ir/item/", ""));
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
103 counter+=1;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
104 System.out.println("adding:"+obj.getESciDocId());
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
105 }
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
106 }
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
107 //if (counter>10)
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
108 // break;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
109
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
110 }
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
111 String addMemberXML="<param last-modification-date=\""+dateString+"\">";
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
112 for(String id: ids){
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
113 addMemberXML+="<relation>"+
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
114 "<targetId>"+id+"</targetId>"+
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
115 "<predicate>http://www.escidoc.de/ontologies/mpdl-ontologies/content-relations#hasMember</predicate>" +
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
116 " </relation>";
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
117
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
118 }
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
119 addMemberXML+="</param>";
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
120
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
121 System.out.println("addmemberXML:"+addMemberXML);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
122 File addFile = new File("/tmp/add.txt");
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
123 FileWriter fw = new FileWriter(addFile);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
124 fw.write(addMemberXML);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
125 fw.close();
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
126
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
127 //HttpResponse res = hd.eScidocPost("/ir/container/escidoc:161163/members/add", EScidocBasicHandler.convertStringToStream(addMemberXML));
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
128 //HttpResponse res = hd.eScidocPost("/ir/container/escidoc:161164/members/add", EScidocBasicHandler.convertStringToStream(addMemberXML));
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
129 HttpResponse res = hd.eScidocPost(itemString+"/content-relations/add", EScidocBasicHandler.convertStringToStream(addMemberXML));
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
130 System.out.println(EScidocBasicHandler.convertStreamToString(res.getEntity().getContent()));
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
131 }
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
132
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
133 private Set<String> getIdentifiersFromPubmanPath(String contextMembers) {
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
134 HttpGet get = new HttpGet(contextMembers);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
135 DefaultHttpClient httpclient = new DefaultHttpClient();
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
136 Set<String> retSet = new HashSet<String>();
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
137 HttpResponse response;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
138 try {
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
139 response = httpclient.execute(get);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
140 } catch (ClientProtocolException e1) {
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
141 // TODO Auto-generated catch block
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
142 e1.printStackTrace();
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
143 return null;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
144 } catch (IOException e1) {
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
145 // TODO Auto-generated catch block
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
146 e1.printStackTrace();
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
147 return null;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
148 }
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
149 if (response.getStatusLine().getStatusCode()>200){
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
150 logger.error(contextMembers);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
151 logger.error(response.getStatusLine().getReasonPhrase());
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
152 return null;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
153 }
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
154
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
155 HttpEntity ent = response.getEntity();
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
156
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
157 SAXBuilder builder = new SAXBuilder();
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
158 Document doc;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
159 try {
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
160 doc = builder.build(ent.getContent());
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
161 } catch (IllegalStateException e) {
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
162 // TODO Auto-generated catch block
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
163 e.printStackTrace();
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
164 return null;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
165 } catch (JDOMException e) {
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
166 // TODO Auto-generated catch block
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
167 e.printStackTrace();
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
168 return null;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
169 } catch (IOException e) {
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
170 // TODO Auto-generated catch block
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
171 e.printStackTrace();
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
172 return null;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
173 }
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
174 XPath xpath=null;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
175 try {
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
176 String xpathString="/zs:searchRetrieveResponse/zs:records/zs:record/zs:recordData/escidocItem:item/metadataRecords:md-records/metadataRecords:md-record/publication:publication/dc:identifier";
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
177 xpath = EScidocTools.getESciDocXpath(xpathString);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
178 } catch (JDOMException e) {
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
179 // TODO Auto-generated catch block
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
180 e.printStackTrace();
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
181 return null;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
182 }
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
183 List<Element> nodes;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
184 try {
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
185 nodes= xpath.selectNodes(doc);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
186 } catch (JDOMException e) {
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
187 // TODO Auto-generated catch block
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
188 e.printStackTrace();
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
189 return null;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
190 }
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
191 File outAdd = new File("/tmp/outadded.txt");
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
192 try {
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
193 FileWriter fw = new FileWriter(outAdd);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
194 for (Element el: nodes){
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
195 String text = el.getTextTrim();
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
196 Pattern x = Pattern.compile("/library/([^/]*)");
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
197 logger.debug("found:"+text);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
198 Matcher m = x.matcher(text);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
199 boolean matched = m.find();
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
200 if (matched){
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
201 String id = m.group(1);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
202 logger.debug("adding:"+id);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
203 retSet.add(id);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
204 fw.write(id+"\n");
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
205 }
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
206
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
207 }
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
208 fw.close();
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
209 } catch (IOException e) {
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
210 // TODO Auto-generated catch block
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
211 e.printStackTrace();
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
212 System.exit(0);
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
213 }
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
214 return retSet;
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
215 }
a844f6948dd8 ?nderungen im Walker
dwinter
parents:
diff changeset
216 }