Mercurial > hg > mpdl-group
comparison software/mpdl-services/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/harvester/PathExtractor.java @ 23:e845310098ba
diverse Korrekturen
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 27 Nov 2012 12:35:19 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
22:6a45a982c333 | 23:e845310098ba |
---|---|
1 package de.mpg.mpiwg.berlin.mpdl.cms.harvester; | |
2 | |
3 import java.io.File; | |
4 import java.io.IOException; | |
5 import java.util.ArrayList; | |
6 import java.util.List; | |
7 | |
8 import javax.xml.stream.XMLInputFactory; | |
9 import javax.xml.stream.XMLStreamConstants; | |
10 import javax.xml.stream.XMLStreamException; | |
11 import javax.xml.stream.XMLStreamReader; | |
12 | |
13 import org.apache.http.HttpEntity; | |
14 import org.apache.http.HttpResponse; | |
15 import org.apache.http.client.HttpClient; | |
16 import org.apache.http.client.methods.HttpGet; | |
17 import org.apache.http.impl.client.DefaultHttpClient; | |
18 | |
19 public class PathExtractor { | |
20 private List<String> ressourceLoc; | |
21 String excludes; | |
22 | |
23 public PathExtractor() { | |
24 | |
25 } | |
26 | |
27 public List<String> initExtractor(String startingUri, String excludes) { | |
28 this.excludes = excludes; | |
29 ressourceLoc = new ArrayList<String>(); | |
30 // parameter necessery, because it's recursive, thus changing the uri | |
31 extractDocLocations(startingUri); | |
32 System.out.println("extracing resource locations done."); | |
33 return this.ressourceLoc; | |
34 } | |
35 | |
36 /** | |
37 * recursive Method to extract the path of the resources | |
38 * | |
39 * @param startUrl | |
40 */ | |
41 private void extractDocLocations(String startUrl) { | |
42 HttpClient client = new DefaultHttpClient(); | |
43 HttpGet httpget = new HttpGet(startUrl); | |
44 HttpResponse resp = null; | |
45 try { | |
46 resp = client.execute(httpget); | |
47 } catch (IOException e) { | |
48 e.printStackTrace(); | |
49 } | |
50 HttpEntity entity = resp.getEntity(); | |
51 if (entity != null) { | |
52 XMLInputFactory iFactory = XMLInputFactory.newInstance(); | |
53 XMLStreamReader reader = null; | |
54 try { | |
55 reader = iFactory.createXMLStreamReader(entity.getContent()); | |
56 } catch (IllegalStateException e1) { | |
57 e1.printStackTrace(); | |
58 } catch (XMLStreamException e1) { | |
59 e1.printStackTrace(); | |
60 } catch (IOException e1) { | |
61 e1.printStackTrace(); | |
62 } | |
63 | |
64 try { | |
65 while (true) { | |
66 int event = reader.next(); | |
67 if (event == XMLStreamConstants.END_DOCUMENT) { | |
68 reader.close(); | |
69 break; | |
70 } | |
71 if (event == XMLStreamConstants.START_ELEMENT) { | |
72 if ((reader.getAttributeValue(null, "name")) != null) { | |
73 if (reader.getLocalName().equals("collection") && !(startUrl.endsWith(reader.getAttributeValue(null, "name")))) { | |
74 if(!(this.excludes.contains(reader.getAttributeValue(null, "name").toLowerCase()))){ | |
75 if (reader.getAttributeValue(null, "name").startsWith("/")) { | |
76 client.getConnectionManager().closeExpiredConnections(); | |
77 extractDocLocations(startUrl + reader.getAttributeValue(null, "name")); | |
78 } else { | |
79 client.getConnectionManager().closeExpiredConnections(); | |
80 if (!startUrl.endsWith("/")) { | |
81 extractDocLocations(startUrl + "/" + reader.getAttributeValue(null, "name")); | |
82 } else { | |
83 extractDocLocations(startUrl + reader.getAttributeValue(null, "name")); | |
84 } | |
85 } | |
86 } | |
87 } | |
88 if (reader.getLocalName().equals("resource")) { | |
89 if (!startUrl.endsWith("/")) { | |
90 ressourceLoc.add(startUrl + "/" + reader.getAttributeValue(null, "name")); | |
91 } else { | |
92 ressourceLoc.add(startUrl + reader.getAttributeValue(null, "name")); | |
93 } | |
94 } | |
95 } | |
96 } | |
97 if (event == XMLStreamConstants.ATTRIBUTE) { | |
98 // System.out.println("localName : "+reader.getLocalName()); | |
99 } | |
100 } | |
101 } catch (XMLStreamException e) { | |
102 e.printStackTrace(); | |
103 } | |
104 } | |
105 } | |
106 | |
107 /** | |
108 * extrahiert ebenso wie extractDocLocations(String startUri) Pfade, tut dies | |
109 * aber local und nicht über HTTP | |
110 * | |
111 * @return | |
112 */ | |
113 public List<String> extractPathLocally(String startUrl) { | |
114 List<String> pathList = new ArrayList<String>(); | |
115 | |
116 // home verzeichnis pfad über system variable | |
117 // String loc = System.getenv("HOME")+"/wsp/configs"; | |
118 // out.println("hom variable + conf datei : "+loc); | |
119 File f = new File(startUrl); | |
120 // out.println("readable : "+Boolean.toString(f.canRead())); | |
121 // out.println("readable : "+f.isDirectory()); | |
122 if (f.isDirectory()) { | |
123 File[] filelist = f.listFiles(); | |
124 for (File file : filelist) { | |
125 if (file.getName().toLowerCase().contains("config")) { | |
126 if (!startUrl.endsWith("/")) { | |
127 pathList.add(startUrl + "/" + file.getName()); | |
128 } else { | |
129 pathList.add(startUrl + file.getName()); | |
130 } | |
131 } | |
132 } | |
133 } | |
134 return pathList; | |
135 } | |
136 | |
137 } |