diff software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/harvester/PathExtractor.java @ 25:e9fe3186670c default tip

letzter Stand eingecheckt
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Tue, 21 May 2013 10:19:32 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/harvester/PathExtractor.java	Tue May 21 10:19:32 2013 +0200
@@ -0,0 +1,137 @@
+package de.mpg.mpiwg.berlin.mpdl.cms.harvester;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import javax.xml.stream.XMLInputFactory;
+import javax.xml.stream.XMLStreamConstants;
+import javax.xml.stream.XMLStreamException;
+import javax.xml.stream.XMLStreamReader;
+
+import org.apache.http.HttpEntity;
+import org.apache.http.HttpResponse;
+import org.apache.http.client.HttpClient;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.impl.client.DefaultHttpClient;
+
+public class PathExtractor {
+  private List<String> ressourceLoc;
+  String excludes;
+
+  public PathExtractor() {
+
+  }
+
+  public List<String> initExtractor(String startingUri, String excludes) {
+    this.excludes = excludes;
+    ressourceLoc = new ArrayList<String>();
+    // parameter necessery, because it's recursive, thus changing the uri
+    extractDocLocations(startingUri);
+    System.out.println("extracing resource locations done.");
+    return this.ressourceLoc;
+  }
+
+  /**
+   * recursive Method to extract the path of the resources
+   * 
+   * @param startUrl
+   */
+  private void extractDocLocations(String startUrl) {
+    HttpClient client = new DefaultHttpClient();
+    HttpGet httpget = new HttpGet(startUrl);
+    HttpResponse resp = null;
+    try {
+      resp = client.execute(httpget);
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+    HttpEntity entity = resp.getEntity();
+    if (entity != null) {
+      XMLInputFactory iFactory = XMLInputFactory.newInstance();
+      XMLStreamReader reader = null;
+      try {
+        reader = iFactory.createXMLStreamReader(entity.getContent());
+      } catch (IllegalStateException e1) {
+        e1.printStackTrace();
+      } catch (XMLStreamException e1) {
+        e1.printStackTrace();
+      } catch (IOException e1) {
+        e1.printStackTrace();
+      }
+
+      try {
+        while (true) {
+          int event = reader.next();
+          if (event == XMLStreamConstants.END_DOCUMENT) {
+            reader.close();
+            break;
+          }
+          if (event == XMLStreamConstants.START_ELEMENT) {
+            if ((reader.getAttributeValue(null, "name")) != null) {
+              if (reader.getLocalName().equals("collection") && !(startUrl.endsWith(reader.getAttributeValue(null, "name")))) {
+                if(!(this.excludes.contains(reader.getAttributeValue(null, "name").toLowerCase()))){
+                  if (reader.getAttributeValue(null, "name").startsWith("/")) {
+                    client.getConnectionManager().closeExpiredConnections();
+                    extractDocLocations(startUrl + reader.getAttributeValue(null, "name"));
+                  } else {
+                    client.getConnectionManager().closeExpiredConnections();
+                    if (!startUrl.endsWith("/")) {
+                      extractDocLocations(startUrl + "/" + reader.getAttributeValue(null, "name"));
+                    } else {
+                      extractDocLocations(startUrl + reader.getAttributeValue(null, "name"));
+                    }
+                  }
+                }
+              }
+              if (reader.getLocalName().equals("resource")) {
+                if (!startUrl.endsWith("/")) {
+                  ressourceLoc.add(startUrl + "/" + reader.getAttributeValue(null, "name"));
+                } else {
+                  ressourceLoc.add(startUrl + reader.getAttributeValue(null, "name"));
+                }
+              }
+            }
+          }
+          if (event == XMLStreamConstants.ATTRIBUTE) {
+            // System.out.println("localName : "+reader.getLocalName());
+          }
+        }
+      } catch (XMLStreamException e) {
+        e.printStackTrace();
+      }
+    }
+  }
+
+  /**
+   * extrahiert ebenso wie extractDocLocations(String startUri) Pfade, tut dies
+   * aber local und nicht über HTTP
+   * 
+   * @return
+   */
+  public List<String> extractPathLocally(String startUrl) {
+    List<String> pathList = new ArrayList<String>();
+
+    // home verzeichnis pfad über system variable
+    // String loc = System.getenv("HOME")+"/wsp/configs";
+    // out.println("hom variable + conf datei : "+loc);
+    File f = new File(startUrl);
+    // out.println("readable : "+Boolean.toString(f.canRead()));
+    // out.println("readable : "+f.isDirectory());
+    if (f.isDirectory()) {
+      File[] filelist = f.listFiles();
+      for (File file : filelist) {
+        if (file.getName().toLowerCase().contains("config")) {
+          if (!startUrl.endsWith("/")) {
+            pathList.add(startUrl + "/" + file.getName());
+          } else {
+            pathList.add(startUrl + file.getName());
+          }
+        }
+      }
+    }
+    return pathList;
+  }
+
+}