# HG changeset patch # User dwinter # Date 1290613927 -3600 # Node ID c6929e63b0b8ac70d97ea3ae25209c25c4bbc27e first import diff -r 000000000000 -r c6929e63b0b8 .classpath --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.classpath Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r c6929e63b0b8 .project --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.project Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,30 @@ + + + eSciDocImport + + + + + + org.eclipse.wst.common.project.facet.core.builder + + + + + org.eclipse.jdt.core.javabuilder + + + + + org.eclipse.wst.validation.validationbuilder + + + + + + org.eclipse.jem.workbench.JavaEMFNature + org.eclipse.wst.common.modulecore.ModuleCoreNature + org.eclipse.jdt.core.javanature + org.eclipse.wst.common.project.facet.core.nature + + diff -r 000000000000 -r c6929e63b0b8 .settings/org.eclipse.jdt.core.prefs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.settings/org.eclipse.jdt.core.prefs Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,12 @@ +#Thu Aug 12 09:32:38 CEST 2010 +eclipse.preferences.version=1 +org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled +org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6 +org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve +org.eclipse.jdt.core.compiler.compliance=1.6 +org.eclipse.jdt.core.compiler.debug.lineNumber=generate +org.eclipse.jdt.core.compiler.debug.localVariable=generate +org.eclipse.jdt.core.compiler.debug.sourceFile=generate +org.eclipse.jdt.core.compiler.problem.assertIdentifier=error +org.eclipse.jdt.core.compiler.problem.enumIdentifier=error +org.eclipse.jdt.core.compiler.source=1.6 diff -r 000000000000 -r c6929e63b0b8 .settings/org.eclipse.wst.common.component --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.settings/org.eclipse.wst.common.component Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,6 @@ + + + + + + diff -r 000000000000 -r c6929e63b0b8 .settings/org.eclipse.wst.common.project.facet.core.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.settings/org.eclipse.wst.common.project.facet.core.xml Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,7 @@ + + + + + + + diff -r 000000000000 -r c6929e63b0b8 libs/commons-codec-1.3.jar Binary file libs/commons-codec-1.3.jar has changed diff -r 000000000000 -r c6929e63b0b8 libs/commons-logging-1.1.jar Binary file libs/commons-logging-1.1.jar has changed diff -r 000000000000 -r c6929e63b0b8 libs/httpclient-4.0.1.jar Binary file libs/httpclient-4.0.1.jar has changed diff -r 000000000000 -r c6929e63b0b8 libs/httpcore-4.0.1.jar Binary file libs/httpcore-4.0.1.jar has changed diff -r 000000000000 -r c6929e63b0b8 libs/jaxen-1.1.1.jar Binary file libs/jaxen-1.1.1.jar has changed diff -r 000000000000 -r c6929e63b0b8 libs/jdom-1.0.jar Binary file libs/jdom-1.0.jar has changed diff -r 000000000000 -r c6929e63b0b8 libs/junit-4.6.jar Binary file libs/junit-4.6.jar has changed diff -r 000000000000 -r c6929e63b0b8 libs/log4j-1.2.15.jar Binary file libs/log4j-1.2.15.jar has changed diff -r 000000000000 -r c6929e63b0b8 libs/org.restlet.ext.servlet.jar Binary file libs/org.restlet.ext.servlet.jar has changed diff -r 000000000000 -r c6929e63b0b8 libs/ws-commons-util-1.0.2.jar Binary file libs/ws-commons-util-1.0.2.jar has changed diff -r 000000000000 -r c6929e63b0b8 libs/xmlrpc-client-3.1.2.jar Binary file libs/xmlrpc-client-3.1.2.jar has changed diff -r 000000000000 -r c6929e63b0b8 libs/xmlrpc-common-3.1.2.jar Binary file libs/xmlrpc-common-3.1.2.jar has changed diff -r 000000000000 -r c6929e63b0b8 log4uconf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/log4uconf.xml Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,55 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff -r 000000000000 -r c6929e63b0b8 src/META-INF/MANIFEST.MF --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/META-INF/MANIFEST.MF Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,3 @@ +Manifest-Version: 1.0 +Class-Path: + diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocBasicHandler.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocBasicHandler.java Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,682 @@ +package de.mpiwg.itgroup.eSciDoc.Tools; +import java.io.BufferedReader; +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.UnsupportedEncodingException; +import java.net.URI; +import java.net.URL; +import java.net.URLEncoder; +import java.util.ArrayList; +import java.util.List; +import java.util.StringTokenizer; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import javax.swing.text.html.HTMLDocument.HTMLReader.IsindexAction; + + +import org.apache.http.HttpEntity; +import org.apache.http.HttpResponse; +import org.apache.http.client.ClientProtocolException; +import org.apache.http.client.HttpClient; +import org.apache.http.client.methods.HttpDelete; +import org.apache.http.client.methods.HttpEntityEnclosingRequestBase; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.client.methods.HttpPut; +import org.apache.http.client.methods.HttpRequestBase; +import org.apache.http.client.params.ClientPNames; +import org.apache.http.client.params.CookiePolicy; +import org.apache.http.entity.InputStreamEntity; +import org.apache.http.impl.client.DefaultHttpClient; +import org.apache.http.impl.conn.SingleClientConnManager; +import org.apache.http.protocol.BasicHttpContext; +import org.apache.http.protocol.HttpContext; +import org.apache.log4j.Logger; +import org.jdom.Attribute; +import org.jdom.Document; +import org.jdom.Element; +import org.jdom.JDOMException; +import org.jdom.Text; +import org.jdom.input.SAXBuilder; +import org.jdom.xpath.XPath; + +import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject; + + + + + +/** + * Handler to create a connection with an eScidoc server + * @author dwinter + * + */ +public class EScidocBasicHandler { + + private Logger logger; + private String eScidocServer; + private int eScidocPort; + private String user; + private String password; + public String eScidocUrl; + private HttpClient httpclient=null; + + /** + * @param eScidocServer + * @param eScidocPort + * @param user + * @param password + */ + public EScidocBasicHandler(String eScidocServer, int eScidocPort,String user, String password){ + this.eScidocServer=eScidocServer; + this.eScidocPort=eScidocPort; + this.user=user; + this.password=password; + this.eScidocUrl="http://"+eScidocServer+":"+String.valueOf(eScidocPort); + + logger = Logger.getRootLogger(); + + } + + + + /** + * Logs you into escidoc and sets the httpclient field to the current client for this session + * @return gives you the httpclient for further usage. + * @throws HttpException + * @throws ClientProtocolException + * @throws IOException + */ + public HttpClient login() throws IOException { + httpclient = new DefaultHttpClient(); + + HttpContext localContext = new BasicHttpContext(); + + httpclient.getParams().setParameter(ClientPNames.COOKIE_POLICY, CookiePolicy.BROWSER_COMPATIBILITY); + + HttpPost httppost = new HttpPost(eScidocUrl+"/aa/login?target=/"); + + logger.debug("executing request"); + + HttpResponse response = httpclient.execute(httppost); + //HttpEntity entity = httppost.getRes + + System.out.println("----------------------------------------"); + System.out.println(response.getStatusLine()); + + HttpEntity entity = response.getEntity(); + if (entity != null) { entity.consumeContent(); + } + + HttpGet httpget = new HttpGet(eScidocUrl+"/aa/j_spring_security_check?j_username="+user+"&j_password="+password); + + response = httpclient.execute(httpget); + //entity = response.getEntity(); + + System.out.println("----------------------------------------"); + System.out.println(response.getStatusLine()); + + entity = response.getEntity(); + if (entity != null) { entity.consumeContent(); + } + + //entity.consumeContent(); + return httpclient; + } + + /** + * Sends a PUT request to the escidoc client. performs a login if not done before. + * @param command + * @param body + * @return + * @throws ClientProtocolException + * @throws IOException + */ + public HttpResponse eScidocPut(String command, InputStream body) throws IOException { + HttpPut httpput = new HttpPut(eScidocUrl+command); + return eScidocRequestBase(httpput,command,body); + + } + + /** + * Sends a PUT request to the escidoc client. performs a login if not done before. + * @param command + * @param url + * @return + * @throws ClientProtocolException + * @throws IOException + */ + public HttpResponse eScidocPut(String command, URL url) throws IOException{ + + HttpPut httpput = new HttpPut(eScidocUrl+command); + return eScidocRequestBase(httpput,command,url.openStream()); + } + /** + * Sends a POST request to the escidoc client. performs a login if not done before. + * @param command + * @param body + * @return + * @throws ClientProtocolException + * @throws IOException + */ + public HttpResponse eScidocPost(String command, InputStream body) throws IOException{ + HttpPost httppost = new HttpPost(eScidocUrl+command); + return eScidocRequestBase(httppost,command,body); + } + + /** Sends a POST request to the escidoc client. performs a login if not done before. + * @param command + * @param url + * @return + * @throws ClientProtocolException + * @throws IOException + */ + public HttpResponse eScidocPost(String command, URL url) throws IOException{ + HttpPost httppost = new HttpPost(eScidocUrl+command); + + + return eScidocRequestBase(httppost,command,url.openStream()); + } + /** + * Sends a GET request to the escidoc client. performs a login if not done before. + * @param command + * @return + * @throws ClientProtocolException + * @throws IOException + */ + public HttpResponse eScidocGet(String command) throws IOException{ + HttpGet httpget = new HttpGet(eScidocUrl+command); + return eScidocRequestBase(httpget,command,null); + + } + + /** Send a delete command to the escidoc client. performs a login if necessara + * @param command + * @return + * @throws ClientProtocolException + * @throws IOException + */ + public HttpResponse eScidocDelete(String command) throws IOException{ + HttpDelete httpdelete = new HttpDelete(eScidocUrl+command); + return eScidocRequestBase(httpdelete,command,null); + + } + /** + * Generates and sends a general request to escidoc, used by escidocPUT, POST and GET. + * @param httpBase + * @param command + * @param body + * @return + * @throws ClientProtocolException + * @throws IOException + */ + private HttpResponse eScidocRequestBase(HttpRequestBase httpBase, String command, InputStream body) throws IOException { + + + + if (httpclient==null) + login(); + + + + if (HttpEntityEnclosingRequestBase.class.isInstance(httpBase)){ + + + if (body!=null){ + + + + HttpEntity entity = new InputStreamEntity(body, -1); + ((HttpEntityEnclosingRequestBase)httpBase).setEntity(entity); + } + } + + logger.debug("executing request:"+httpBase.getRequestLine()); + + + HttpResponse status = httpclient.execute(httpBase); + //HttpEntity responseEntity = response.getEntity(); + + logger.debug("----------------------------------------"); + logger.debug(status); + + + return status; + } + + /** + * + * To convert the InputStream to String we use the BufferedReader.readLine() + * method. We iterate until the BufferedReader return null which means + * there's no more data to read. Each line will appended to a StringBuilder + * and returned as String. + + * @param is + * @return + */ + public static String convertStreamToString(InputStream is) { + + BufferedReader reader = new BufferedReader(new InputStreamReader(is)); + StringBuilder sb = new StringBuilder(); + + String line = null; + try { + while ((line = reader.readLine()) != null) { + sb.append(line + "\n"); + } + } catch (IOException e) { + e.printStackTrace(); + } finally { + try { + is.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } + + return sb.toString(); + } + + /** converts a stream to a string + * @param string + * @return + * @throws UnsupportedEncodingException + */ + public static InputStream convertStringToStream(String string) throws UnsupportedEncodingException{ + return new ByteArrayInputStream(string.getBytes("utf-8")); + } + + /** + * Find the last-modification-date from an escidoc xml-file (item/container/context) + * @param ret + * @return + */ + public static String getDateStamp(String ret) { + Pattern p = Pattern.compile("last-modification-date=\"([^\"]*)\""); + + Matcher m = p.matcher(ret); + + m.find(); + + String txt; + try { + txt = m.group(1); + } catch (IllegalStateException e) { + + e.printStackTrace(); + System.out.println(ret); + throw new IllegalStateException(); + } + return txt; + } + + /** + * Find the content-modell (item) + * @param ret + * @return + * @throws IOException + * @throws JDOMException + * @throws UnsupportedEncodingException + */ + public static String getContentModel(InputStream escidocstream) throws UnsupportedEncodingException, JDOMException, IOException { + SAXBuilder builder = new SAXBuilder(); + Document doc = builder.build(escidocstream); + + Element root= doc.getRootElement(); + XPath xpath = EScidocTools.getESciDocXpath("escidocItem:properties/srel:content-model/@xlink:href"); + + Attribute node = (Attribute) xpath.selectSingleNode(root); + + return node.getValue(); + + } + + + /** + * Find the escidoc:id from an escidoc xml (item/container/context) + * @param ret + * @return + */ + public static String getId(String ret) { + Pattern p = Pattern.compile("xlink:href=\"([^\"]*)\""); + + Matcher m = p.matcher(ret); + + m.find(); + String txt = m.group(1); + String[] splitted = txt.split("/"); + + String id = splitted[splitted.length-1]; + return id; + } + + + /** + * Get the pid from an escidoc xml-file (item/container/context) + * @param retTxt + * @return + * @throws JDOMException + * @throws IOException + */ + public static String getPID(String retTxt) throws JDOMException, IOException { + SAXBuilder builder = new SAXBuilder(); + + + Document doc = builder.build(EScidocBasicHandler.convertStringToStream(retTxt)); + + XPath xpath = XPath.newInstance("//prop:pid"); + xpath.addNamespace("prop", EScidocTools.prop); + + Element node = (Element) xpath.selectSingleNode(doc); + + return node.getTextTrim(); + + } + + + /** + * get the number of the last version from an escidoc xml-file (item/container/context) + * @param retTxt + * @return + * @throws JDOMException + * @throws IOException + */ + public static String getLastVersionNumber(String retTxt) throws JDOMException, IOException { + SAXBuilder builder = new SAXBuilder(); + + Document doc = builder.build(EScidocBasicHandler.convertStringToStream(retTxt)); + + + XPath xpath = XPath.newInstance("//prop:version[@xlink:title='This Version']/version:number"); + xpath.addNamespace("prop", EScidocTools.prop); + xpath.addNamespace("xlink", EScidocTools.xlink); + xpath.addNamespace("version", EScidocTools.version); + + Element node = (Element) xpath.selectSingleNode(doc); + + return node.getText(); + } + + public static Object getXPath(Element node, String string,boolean single) throws JDOMException { + XPath xpath= XPath.newInstance(string); + xpath.addNamespace("dc",EScidocTools.DC); + xpath.addNamespace("escidocComponents",EScidocTools.escidocComponents); + xpath.addNamespace("prop",EScidocTools.prop); + xpath.addNamespace("xlink",EScidocTools.xlink); + xpath.addNamespace("mpiwg",EScidocTools.MPIWG); + xpath.addNamespace("version",EScidocTools.version); + + if (single) + return xpath.selectSingleNode(node); + else + return xpath.selectNodes(node); + } + + + + public Long writeItem(eSciDocXmlObject escidocItem) { + // TODO Auto-generated method stub + return null; + } + + + + public Boolean createItem(eSciDocXmlObject escidocItem) { + String cmd="/ir/item"; + + try { + String retStr = escidocItem.printXML(); + String newObj = createObject(cmd, retStr); + return escidocItem.upDateFromXML(newObj); + } catch (Exception e) { + // TODO Auto-generated catch block + e.printStackTrace(); + return false; + } + + + } + + public String createObject (String command,String xml) throws Exception { + + + InputStream stream = new ByteArrayInputStream(xml.getBytes("utf-8")); + + + + //DefaultHttpClient httpclient = login(); + + + + HttpResponse eScidocPut = eScidocPut(command, stream); + + + //HttpEntity responseEntity = eScidocPut.getEntity(); + + System.out.println("----------------------------------------"); + System.out.println(eScidocPut.getStatusLine()); + + int code = eScidocPut.getStatusLine().getStatusCode(); + + InputStream st = eScidocPut.getEntity().getContent(); + //System.out.println(EScidocBasicHandler.convertStreamToString(st)); + String xmlret = EScidocBasicHandler.convertStreamToString(st); + if (code !=200){ + logger.error(xmlret); + throw (new Exception("CAN not DO error:"+code)); + + } + return xmlret; + + } + + + + public String getIDfromPID(String pid, String context) throws ClientProtocolException, IOException, IllegalStateException, JDOMException { + + + String filter = ""; + + filter += pid; + filter += ""; + + String command = context + + "/resources/members/filter"; + HttpResponse result =eScidocPost(command, + new ByteArrayInputStream(filter.getBytes())); + + Document dom = new SAXBuilder().build(result.getEntity().getContent()); + + XPath xp = EScidocTools.getESciDocXpath("//escidocItem:item/@xlink:href"); + + Attribute attr = (Attribute)xp.selectSingleNode(dom); + + if (attr!=null){ + return attr.getValue(); + } + + return null; + //return convertStreamToString(result.getEntity().getContent()); + + + } + + + + public HttpResponse submitAnObject(eSciDocXmlObject obj, String comment) throws ClientProtocolException, IOException, JDOMException { + try { + addVersionPid(obj); + } catch (Exception e) { + // TODO: handle exception + } + + String retTxt = obj.printXML(); + String dateStamp = EScidocBasicHandler.getDateStamp(retTxt); + + String param = ""; + param+=""+comment+""; + param+=""; + + String command=obj.getESciDocId()+"/submit"; + HttpResponse result = eScidocPost(command, EScidocBasicHandler.convertStringToStream(param)); + + return result; + + } + + public List getObjectListFromFilterResult(String command, String objectXpath) throws IOException, IllegalStateException, JDOMException { + //String filter = ""; + // + //String command = context + // + "/resources/members/filter"; + //HttpResponse result =eScidocPost(command, + //new ByteArrayInputStream(filter.getBytes())); + + //String command = context+"/resources/members"; + + HttpResponse result =eScidocGet(command); + Document dom = new SAXBuilder().build(result.getEntity().getContent()); + XPath xp = EScidocTools.getESciDocXpath(objectXpath); + + List attr = (List)xp.selectNodes(dom); + ArrayList ret = new ArrayList(); + for (Element el: attr){ + ret.add(new eSciDocXmlObject(el)); + } + return ret; + } + + public boolean addVersionPid(eSciDocXmlObject obj) throws ClientProtocolException, IOException, JDOMException{ + //HttpResponse ret = eScidocGet(href); + //String retTxt = EScidocBasicHandler.convertStreamToString(ret.getEntity().getContent()); + + String retTxt = obj.printXML(); + String dateStamp = EScidocBasicHandler.getDateStamp(retTxt); + String pid=EScidocBasicHandler.getPID(retTxt); + String versionNumber = EScidocBasicHandler.getLastVersionNumber(retTxt); + + String vpid=pid+":"+versionNumber; + + String param = ""; + param+=""+vpid+""; + param+=""; + String command=obj.getESciDocId()+"/assign-version-pid"; + + HttpResponse result = eScidocPost(command, EScidocBasicHandler.convertStringToStream(param)); + + int code = result.getStatusLine().getStatusCode(); + result.getEntity().consumeContent(); + if (code!=200) + return false; + return true; + + + } + + + + public HttpResponse releaseAnObject(eSciDocXmlObject obj, String comment) throws IOException, JDOMException { + //HttpResponse ret = getEsciDocHandler().eScidocGet(href); + //String retTxt = EScidocBasicHandler.convertStreamToString(ret.getEntity().getContent()); + + String retTxt = obj.printXML(); + String dateStamp = EScidocBasicHandler.getDateStamp(retTxt); + + String param = ""; + param+=""+comment+""; + param+=""; + + String command=obj.getESciDocId()+"/release"; + HttpResponse result = eScidocPost(command, EScidocBasicHandler.convertStringToStream(param)); + + return result; + + + + + } + + + + public boolean upDateObject(eSciDocXmlObject obj) throws Exception { + HttpResponse result = eScidocGet(obj.getESciDocId()); + if (result.getStatusLine().getStatusCode()!=200){ + logger.debug(result.getEntity().getContent()); + return false; + } + + String xml = convertStreamToString(result.getEntity().getContent()); + obj.upDateFromXML(xml); + return true; + } + + + + public boolean alreadyExists(String indexField, String testString, String context) throws Exception { + + String[] ct = context.split("/"); // gebraucht wird hier nur die id, dh ohne /ir/... + + String contextId=ct[ct.length-1]; + + + String searchString = String.format("\"%s\"=\"%s\"",indexField,testString); + searchString += " and "+String.format("\"%s\"=\"%s\"","/properties/context/id",contextId); + + searchString = URLEncoder.encode(searchString,"utf-8"); + HttpResponse ret = eScidocGet("/ir/items?operation=searchRetrieve&version=1.1&query="+searchString); + + if (ret.getStatusLine().getStatusCode()!=200) + { + logger.debug("alreadyExists: error searchstring:"+searchString); + HttpEntity ent = ret.getEntity(); + if (ent!=null) + ent.consumeContent(); + throw new Exception(); + } + Document doc = new SAXBuilder().build(ret.getEntity().getContent()); + + XPath xp = EScidocTools.getESciDocXpath("//zs:numberOfRecords/text()"); + String hitsStr = ((Text)xp.selectSingleNode(doc)).getText(); + Integer hits = Integer.valueOf(hitsStr); + if (hits>0) + return true; + return false; + } + + + + public ArrayList getAllLinksOfContext(String string, String context) throws IOException, IllegalStateException, JDOMException { + + HttpResponse result = eScidocGet(context+"/resources/members"); + Document doc = new SAXBuilder().build(result.getEntity().getContent()); + XPath xp = EScidocTools.getESciDocXpath("//escidocItem:item"); + XPath id = EScidocTools.getESciDocXpath("./@xlink:href"); + + XPath url= EScidocTools.getESciDocXpath(".//escidocComponents:component[escidocComponents:properties/prop:content-category[text()='"+string+"']]/escidocComponents:content/@xlink:href"); + + ArrayList ret = new ArrayList(); + List items = xp.selectNodes(doc); + + File fl = new File("/tmp/linksofcontext.out"); + FileWriter fw = new FileWriter(fl); + + for (Element item: items){ + + Attribute idAttribute = (Attribute)id.selectSingleNode(item); + String idStr = idAttribute.getValue(); + + Attribute urlAttribute = (Attribute)url.selectSingleNode(item); + String urlStr = urlAttribute.getValue(); + ret.add(idStr+","+urlStr); + logger.debug("getALLLinksOfContex:"+idStr+","+urlStr); + fw.write(idStr+","+urlStr+"\n"); + } + fw.close(); + return ret; + } +} diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocTools.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/EScidocTools.java Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,82 @@ +package de.mpiwg.itgroup.eSciDoc.Tools; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.List; + +import org.apache.http.HttpResponse; +import org.apache.http.client.ClientProtocolException; +import org.jdom.JDOMException; +import org.jdom.xpath.XPath; + +import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject; + +public class EScidocTools { + public static final String DC = "http://purl.org/dc/elements/1.1/"; + private static final String OAI = "http://www.openarchives.org/OAI/2.0/oai_dc/"; + static public String FOXML = "info:fedora/fedora-system:def/foxml#"; + static public String VLP = "http://www.mpiwg-berlin.mpg.de/ns/vlp"; + static public String MPIWG = "http://www.mpiwg-berlin.mpg.de/ns/mpiwg"; + static public String RDF = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; + static public String FEDORA = "info:fedora/fedora-system:def/relations-external#"; + static public String escidocItem="http://www.escidoc.de/schemas/item/0.9"; + static public String prop="http://escidoc.de/core/01/properties/"; + static public String escidocComponents="http://www.escidoc.de/schemas/components/0.9"; + static public String xlink="http://www.w3.org/1999/xlink"; + static public String metadataRecords="http://www.escidoc.de/schemas/metadatarecords/0.5"; + public static final String container="http://www.escidoc.de/schemas/container/0.8"; + public static final String item="http://www.escidoc.de/schemas/item/0.7"; + public static final String srw="http://www.loc.gov/zing/srw/"; + public static final String version = "http://escidoc.de/core/01/properties/version/"; + public static final String ECHO="http://www.echo.eu/rdf#"; + public static final String cmm="http://www.escidoc.de/schemas/contentmodel/0.1"; + public static final String zs="http://www.loc.gov/zing/srw/"; + public static final String srel="http://escidoc.de/core/01/structural-relations/"; + public static final String structMap="http://www.escidoc.de/schemas/structmap/0.4"; + + public EScidocTools(EScidocBasicHandler connector) { + // TODO Auto-generated constructor stub + } + + + + + + public static XPath getESciDocXpath(String xpath) throws JDOMException{ + XPath xpathResources = XPath.newInstance(xpath); + + xpathResources.addNamespace("prop",prop); + xpathResources.addNamespace("mpiwg",MPIWG); + + xpathResources.addNamespace("rdf",RDF); + xpathResources.addNamespace("echonavigation",ECHO); + + xpathResources.addNamespace("dc",DC); + xpathResources.addNamespace("oai",OAI); + xpathResources.addNamespace("foxml",FOXML); + xpathResources.addNamespace("vlp",VLP); + + + xpathResources.addNamespace("fedora",FEDORA); + xpathResources.addNamespace("escidocItem",escidocItem); + xpathResources.addNamespace("prop",prop); + xpathResources.addNamespace("escidocComponents",escidocComponents); + xpathResources.addNamespace("xlink",xlink); + xpathResources.addNamespace("metadata-records",metadataRecords); + xpathResources.addNamespace("container",container); + xpathResources.addNamespace("item",item); + xpathResources.addNamespace("srw",srw); + xpathResources.addNamespace("version",version); + xpathResources.addNamespace("echo",ECHO); + xpathResources.addNamespace("escidocContentModel",cmm); + xpathResources.addNamespace("zs",zs); + xpathResources.addNamespace("srel",srel); + xpathResources.addNamespace("struct-map",structMap); + return xpathResources; + } + + + + +} diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/Tools/FoxmlNameSpaceContext.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/FoxmlNameSpaceContext.java Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,61 @@ +package de.mpiwg.itgroup.eSciDoc.Tools; +import java.util.Iterator; + +import javax.xml.XMLConstants; +import javax.xml.namespace.NamespaceContext; + + +public class FoxmlNameSpaceContext implements NamespaceContext { + public static final String DC = "http://purl.org/dc/elements/1.1/"; + private static final String OAI = "http://www.openarchives.org/OAI/2.0/oai_dc/"; + static public String FOXML = "info:fedora/fedora-system:def/foxml#"; + static public String VLP = "http://www.mpiwg-berlin.mpg.de/ns/vlp"; + static public String MPIWG = "http://www.mpiwg-berlin.mpg.de/ns/mpiwg"; + static public String RDF = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; + static public String FEDORA = "info:fedora/fedora-system:def/relations-external#"; + + public String getNamespaceURI(String prefix) { + if (prefix.equals("foxml")) + return FOXML; + else if (prefix.equals("oai_dc")) + return OAI; + else if (prefix.equals("dc")) + return DC; + else if (prefix.equals("vlp")) + return VLP; + else if (prefix.equals("mpiwg")) + return MPIWG; + else if (prefix.equals("rdf")) + return RDF; + else if (prefix.equals("fedora")) + return FEDORA; + + else + return XMLConstants.NULL_NS_URI; + } + + public String getPrefix(String namespaceURI) { + if (namespaceURI.equals(FOXML)) + return "foxml"; + else if (namespaceURI.equals(OAI)) + return "oai_dc"; + else if (namespaceURI.equals(DC)) + return "dc"; + else if (namespaceURI.equals(VLP)) + return "vlp"; + else if (namespaceURI.equals(MPIWG)) + return "mpiwg"; + else if (namespaceURI.equals(RDF)) + return "rdf"; + else if (namespaceURI.equals(FEDORA)) + return "fedora"; + else + return null; + } + + public Iterator getPrefixes(String namespaceURI) { + // TODO Auto-generated method stub + return null; + } + +} diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/Tools/Html2Text.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/Html2Text.java Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,34 @@ +package de.mpiwg.itgroup.eSciDoc.Tools; + +import java.io.IOException; +import java.io.Reader; + +import javax.swing.text.html.HTMLEditorKit; +import javax.swing.text.html.parser.ParserDelegator; + +//import javax.swing.text.html.*; +//import javax.swing.text.html.parser.*; + + +public class Html2Text extends HTMLEditorKit.ParserCallback { + StringBuffer s; + + public Html2Text() {} + + public void parse(Reader in) throws IOException { + s = new StringBuffer(); + ParserDelegator delegator = new ParserDelegator(); + // the third parameter is TRUE to ignore charset directive + delegator.parse(in, this, Boolean.TRUE); + } + + public void handleText(char[] text, int pos) { + s.append(text); + } + + public String getText() { + return s.toString(); + } + + } + diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/Tools/IngestECHO.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/IngestECHO.java Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,726 @@ +package de.mpiwg.itgroup.eSciDoc.Tools; + +//todo: create context for echo and contentmodell +import java.io.BufferedReader; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.PrintStream; +import java.io.StringReader; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.xpath.XPath; +import javax.xml.xpath.XPathConstants; +import javax.xml.xpath.XPathFactory; + +import org.apache.http.HttpResponse; +import org.apache.http.client.ClientProtocolException; +import org.apache.xmlrpc.XmlRpcException; +import org.apache.xmlrpc.client.XmlRpcClient; +import org.apache.xmlrpc.client.XmlRpcClientConfigImpl; +import org.jdom.JDOMException; +import org.w3c.dom.Document; +import org.w3c.dom.NodeList; +import org.xml.sax.InputSource; +import org.xml.sax.SAXParseException; + +import sun.misc.Regexp; + +//import fedora.client.FedoraClient; +//import fedora.server.access.FedoraAPIA; +//import fedora.server.management.FedoraAPIM; +//import fedora.server.types.gen.ComparisonOperator; +//import fedora.server.types.gen.Condition; +//import fedora.server.types.gen.FieldSearchQuery; +//import fedora.server.types.gen.FieldSearchResult; +//import fedora.server.types.gen.ListSession; +//import fedora.server.types.gen.MIMETypedStream; +//import fedora.server.types.gen.ObjectFields; + +public class IngestECHO extends Ingestor { + + protected String ECHORESOURCE_TEMPLATE_XML; + protected String ECHOCONTAINER_TEMPLATE_XML; + private String SERVLETURL; + protected String ECHOURL; + protected String ECHO_CONTAINER_ID; + protected String ECHO_ROOT_ID; + protected String MAIN_CONTEXT; + private HashMap pids; + + protected static String ESCIDOC_SERVER_URL = "euler.mpiwg-berlin.mpg.de"; + protected static String ZOPEPROVIDER = "http://127.0.0.1:18080"; + + private static int PORT = 8080; + + IngestECHO(String user, String password){ + + super(ESCIDOC_SERVER_URL, PORT, ZOPEPROVIDER, user, password); + ECHORESOURCE_TEMPLATE_XML = "ECHOResourceTemplate.xml"; + + SERVLETURL= "http://nausikaa2.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter?fn="; + + ECHOURL = "http://echo.mpiwg-berlin.mpg.de"; + + //ZOPEPROVIDER = "http://127.0.0.1:18080"; + + ECHO_CONTAINER_ID = "escidoc:3006"; // enthaelt alle ECHO + // objecte + ECHO_ROOT_ID = "escidoc:3005"; // enthaelt alle Objekte die + // keiner ECHO collection + // angehoeren + + MAIN_CONTEXT = "escidoc:3002"; + + HashMap pids = null; + + + } + + void ingestECHOCollections() throws XmlRpcException, IOException { + ArrayList urls = getAllCollections(); + HashMap success = new HashMap(); + HashMap nosuccess = new HashMap(); + + for (String url : urls) { + + try { + String id = ingestECHOCollection(url); + success.put(id, url); + } catch (Exception e) { + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + PrintStream s = new PrintStream(out); + e.printStackTrace(s); + + nosuccess.put(url, out.toString()); + + e.printStackTrace(); + } + } + System.out.println("SUCCESSFULL INGEST"); + for (String id : success.keySet()) + System.out.println("ID:" + id + " URL:" + success.get(id)); + + System.out.println("ERRORS:"); + for (String id : nosuccess.keySet()) { + System.out.println("URL:" + id); + System.out.println("Message:" + nosuccess.get(id)); + } + + } + + void organizeECHOCollections() throws XmlRpcException, IOException, + JDOMException { + ArrayList urls = getAllCollections(); + HashMap success = new HashMap(); + HashMap nosuccess = new HashMap(); + + for (String url : urls) { + + XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl(); + XmlRpcClient client = new XmlRpcClient(); + config.setServerURL(new URL(url)); + client.setConfig(config); + + Object[] params = new Object[] {}; + + if (pids == null) { + pids = getPIDsAndEscidocIdsOfCollections(ECHO_CONTAINER_ID); + } + + try { + String parentPid; + String pid = (String) client.execute("getPID", params); + String contid = pids.get("mpiwg:" + pid); + addECHOObjectToCollection(client, contid); + success.put(pid, url); + } catch (Exception e) { + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + PrintStream s = new PrintStream(out); + e.printStackTrace(s); + + nosuccess.put(url, out.toString()); + + e.printStackTrace(); + } + } + System.out.println("SUCCESSFULL ORGANIZED"); + for (String id : success.keySet()) + System.out.println("ID:" + id + " URL:" + success.get(id)); + + System.out.println("ERRORS:"); + for (String id : nosuccess.keySet()) { + System.out.println("URL:" + id); + System.out.println("Message:" + nosuccess.get(id)); + } + + } + + void organizeECHORessources() throws XmlRpcException, IOException, + JDOMException { + ArrayList urls = getAllResources(); + HashMap success = new HashMap(); + HashMap nosuccess = new HashMap(); + + for (String url : urls) { + + XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl(); + XmlRpcClient client = new XmlRpcClient(); + config.setServerURL(new URL(url)); + client.setConfig(config); + + Object[] params = new Object[] {}; + + if (pids == null) { + pids = getPIDsAndEscidocIdsOfCollections(ECHO_CONTAINER_ID); + } + + try { + String parentPid; + String pid = (String) client.execute("getPID", params); + String contid = getIDfromPID("mpiwg:" + pid); + addECHOObjectToCollection(client, contid); + success.put(pid, url); + } catch (Exception e) { + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + PrintStream s = new PrintStream(out); + e.printStackTrace(s); + + nosuccess.put(url, out.toString()); + + e.printStackTrace(); + } + } + System.out.println("SUCCESSFULL ORGANIZED"); + for (String id : success.keySet()) + System.out.println("ID:" + id + " URL:" + success.get(id)); + + System.out.println("ERRORS:"); + for (String id : nosuccess.keySet()) { + System.out.println("URL:" + id); + System.out.println("Message:" + nosuccess.get(id)); + } + + } + + + + private String getIDfromPID(String pid) throws ClientProtocolException, + IOException { + InputStream res = getXMLfromPID(pid,MAIN_CONTEXT); + return EScidocBasicHandler.getId(EScidocBasicHandler + .convertStreamToString(res)); + } + + /** + * FŸgt die ECHO Collection unter der URL in eScidoc ein. Der Link auf die + * Web-Seite wird in einem eigenen item hinterlegt, dass in Collection + * eingefŸgt wird. + * + * @param url + * @throws Exception + */ + private String ingestECHOCollection(String url) throws Exception { + + // get a PID for the Collection + System.out.println("Processing:" + url); + + HashMap dcs = new HashMap(); // Store + // for + // the + // metadata + + // Verbinde dich mit der Collection Ÿber XML-rpc + + XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl(); + XmlRpcClient client = new XmlRpcClient(); + config.setServerURL(new URL(url)); + client.setConfig(config); + + String pid = getOrCreatePID(client); + + if (pidAlreadyExists("mpiwg:"+pid)) + { + System.out.println("PID:"+pid); + String contid=getIDfromPID("mpiwg:"+pid); + System.out.println("------- belongsTo:"+contid); + return contid; + } + Object[] params = new Object[] {}; + + eSciDocXmlObject obj = new eSciDocXmlObject("mpiwg:" + pid, + ECHOCONTAINER_TEMPLATE_XML); + + String result = (String) client.execute("getDescription", params); + + String x = new String(result.getBytes("UTF-8"), ("UTF-8")); + // System.out.println("DESCR"+x); + dcs.put("description", x); + String title = (String) client.execute("getTitle", params); + dcs.put("title", title); + obj.insertDC(dcs); + obj.addOrigUrlToMPIWGMetaData(url); + + // obj.setRelationship("info:fedora/echo:col1"); + String xml = obj.printXML(); + // System.out.println(xml); + String ret = ingest("/ir/container", xml); + String xr = ingestCollectionWebSite(title, url); + // System.out.println(xr); + String objid = EScidocBasicHandler.getId(xr); + String dateStamp = EScidocBasicHandler.getDateStamp(ret); + String addTxt = ""; + addTxt += "" + objid + ""; + addTxt += ""; + + String contid = EScidocBasicHandler.getId(ret); + + ByteArrayInputStream stream = new ByteArrayInputStream(addTxt + .getBytes("utf-8")); + + eSciDocHandler.eScidocPost("/ir/container/" + contid + "/members/add", + stream); + // System.out.println(response.getStatusLine()); + // System.out.println(EScidocBasicHandler.convertStreamToString(response.getEntity().getContent())); + System.out.println("Processed:" + url + "------>" + contid); + + addToCollection(ECHO_CONTAINER_ID, contid); + + params = new Object[] { pid }; + client.execute("setPID", params); + + System.out.println(ret); + + addECHOObjectToCollection(client, contid); + return contid; + + } + + public ArrayList findMissingItems() throws XmlRpcException, IOException{ + return findMissingItemsFromECHOUrls(getAllResources()); + } + + public ArrayList findMissingCollections() throws XmlRpcException, IOException{ + return findMissingItemsFromECHOUrls(getAllCollections()); + } + + public ArrayList findMissingItemsFromECHOUrls(List urls) throws XmlRpcException, IOException{ + //ArrayList urls = getAllCollections(); + System.out.println("GOT the collections"); + ArrayList ret = new ArrayList(); + for (String url : urls) { + System.out.println("checking:"+url); + XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl(); + XmlRpcClient client = new XmlRpcClient(); + + config.setServerURL(new URL(url)); + client.setConfig(config); + String pid; + try { + Object[] parameters = new Object[] {}; + + pid = (String) client.execute("getPID", parameters); + } catch (Exception e) { + pid = null; + } + + if (pid == null){ + ret.add(url); + System.out.println(" -- no pid"); + } else { + String id; + try { + id = getIDfromPID("mpiwg:"+pid); + } catch (Exception e) { + id = "NO"; + ret.add(url); + } + + + System.out.println(" -- id:"+id); + } + + + + } + return ret; + } + private String getOrCreatePID(XmlRpcClient client) throws XmlRpcException, + MalformedURLException { + Object[] parameters = new Object[] {}; + + String pid = null; + + // Hole pid aus ECHO + try { + pid = (String) client.execute("getPID", parameters); + } catch (Exception e) { + pid = null; + } + // Falls dort noch keine ist, erzeuge ein neue + if (pid == null) + pid = getID(); + else + System.out.println("PID from ECHO:" + pid); + + return pid; + } + + private void addECHOObjectToCollection(XmlRpcClient client, String contid) + throws ClientProtocolException, IOException, JDOMException { + Object[] params; + params = new Object[] {}; + + if (pids == null) { + pids = getPIDsAndEscidocIdsOfCollections(ECHO_CONTAINER_ID); + } + + String parentId; + String parentPid; + try { + parentPid = (String) client.execute("getParentPID", params); + parentId = pids.get("mpiwg:" + parentPid); + } catch (Exception e) { + parentId = ECHO_ROOT_ID; + } + addToCollection(parentId, contid); + + } + + private String ingestCollectionWebSite(String title, String url) + throws Exception { + String pid = getID(); + eSciDocXmlObject obj = new eSciDocXmlObject("mpiwg:" + pid, + "ECHOCollectionWebRepresentationTemplate.xml"); + + HashMap dcs = new HashMap(); + + obj.addWebUrl(url); + // obj.setRelationship("info:fedora/echo:col1"); + + dcs.put("title", title); // ersatzweise den titel aus der echo + // collection + obj.insertDC(dcs); + + String xml = obj.printXML(); + // System.out.println(xml); + String res = ingest("/ir/item", xml); + return res; + } + + public void ingestECHOResources() throws IOException { + ingestECHOResources(null); + + } + public void ingestECHOResources(Pattern match) throws IOException { + ArrayList urls = getAllResources(); + HashMap success = new HashMap(); + HashMap nosuccess = new HashMap(); + + for (String url : urls) { + + try { + Boolean ingest=false; + + if (match == null) + ingest=true; + else { + Matcher m = match.matcher(url); + if (m.matches()) + ingest=true; + + } + if (ingest){ + String id = ingestECHOResource(url); + success.put(id, url); + } + } catch (Exception e) { + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + PrintStream s = new PrintStream(out); + e.printStackTrace(s); + + nosuccess.put(url, out.toString()); + + e.printStackTrace(); + } + } + System.out.println("SUCCESSFULL INGEST"); + for (String id : success.keySet()) + System.out.println("ID:" + id + " URL:" + success.get(id)); + + System.out.println("ERRORS:"); + for (String id : nosuccess.keySet()) { + System.out.println("URL:" + id); + System.out.println("Message:" + nosuccess.get(id)); + } + + } + + protected ArrayList getAllResources() throws IOException { + URL echoUrl = new URL(ECHOURL + "/getResourcesXML"); + Pattern p = Pattern.compile("echoLink=\"([^\"]*)\""); + BufferedReader in = new BufferedReader(new InputStreamReader(echoUrl + .openStream())); + + ArrayList ret = new ArrayList(); + String inputLine; + Matcher m; + while ((inputLine = in.readLine()) != null) { + m = p.matcher(inputLine); + String lit; + if (m.find()) { + + lit = m.group(1); + ret.add(lit); + } + } + + in.close(); + return ret; + + } + + protected String ingestECHOResource(String url) throws Exception { + return ingestECHOResource(url, false); + } + + protected String ingestECHOResource(String url,boolean withfullText) throws Exception { + + System.out.println("Starting:" + url); + + HashMap dcs = new HashMap(); + + XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl(); + XmlRpcClient client = new XmlRpcClient(); + config.setServerURL(new URL(url)); + client.setConfig(config); + + String pid = getOrCreatePID(client); + + if (pidAlreadyExists("mpiwg:"+pid)) + { + System.out.println("PID:"+pid); + String contid=getIDfromPID("mpiwg:"+pid); + System.out.println("------- belongsTo:"+contid); + return contid; + } + eSciDocXmlObject obj = new eSciDocXmlObject("mpiwg:" + pid, + ECHORESOURCE_TEMPLATE_XML); + Object[] params = new Object[] {}; + + String title = (String) client.execute("getTitle", params); + String ml = (String) client.execute("getMetaDataLink", params); + if (withfullText){ + String fulltextURL = url+"/getFullTextXML"; + obj.addFullText(fulltextURL); + + } + ml = correctML(ml); + + obj.addWebUrl(url); + obj.addOrigUrlToMPIWGMetaData(url); + + // obj.setRelationship("info:fedora/echo:col1"); + + config.setServerURL(new URL(ZOPEPROVIDER + "/metadataMain")); + client.setConfig(config); + params = new Object[] { ml }; + + try { + String result = (String) client.execute("getDCFormatted", params); + System.out.println("dC:"+result); + DocumentBuilderFactory factory = DocumentBuilderFactory + .newInstance(); + factory.setNamespaceAware(true); + DocumentBuilder db = factory.newDocumentBuilder(); + + InputSource resultStream = new InputSource(new StringReader(result)); + Document dc = db.parse(resultStream); + obj.insertDC(dc); + + Document indexmeta = db.parse(ml); + + XPath xpath = XPathFactory.newInstance().newXPath(); + xpath.setNamespaceContext(new EScidocNameSpaceContext()); + + NodeList test = (NodeList) xpath.evaluate("//meta", indexmeta, + XPathConstants.NODESET); + if (test.getLength() != 1) + { + test = (NodeList) xpath.evaluate("//mpiwg:meta", indexmeta, + XPathConstants.NODESET); + + if (test.getLength() !=1) + throw new Exception(); + } + obj.insertMeta(test.item(0)); + + obj.addIndexMetaUrl(ml); + + } catch (XmlRpcException e) { + System.err.println("Ressource:" + url); + System.err.println("METADATA CANNOT BE PARSED:" + ml); + HashMap dc = new HashMap(); + dc.put("title", title); // ersatzweise den titel aus der echo + // collection + obj.insertDC(dc); + } catch (SAXParseException e) { + System.err.println("METADATA RESULT CANNOT BE PARSED:"); + HashMap dc = new HashMap(); + dc.put("title", title); // ersatzweise den titel aus der echo + // collection + obj.insertDC(dc); + } + + String xml = obj.printXML(); + System.out.println(xml); + return "XXX"; + String result = ingest("/ir/item", xml); +// String contid = EScidocBasicHandler.getId(result); +// //String contid="NNNN"; +// System.out.println("------->" + contid); +// +// params = new Object[] { pid }; +// config.setServerURL(new URL(url)); +// client.setConfig(config); +// +// client.execute("setPID", params); +// addToCollection(ECHO_CONTAINER_ID, contid); +// +// addECHOObjectToCollection(client, contid); +// return contid; + + } + + private boolean pidAlreadyExists(String pid) { + String id; + try{ + id = getIDfromPID(pid); + } catch (Exception e){ + return false; + } + if (!id.equals("")) + return true; + return false; + } + + private String correctML(String ml) { + Pattern p = Pattern.compile("experimental/(.*)"); + Matcher m = p.matcher(ml); + String pf; + if (m.find()) + pf = "experimental/" + m.group(1); + else { + p = Pattern.compile("permanent/(.*)"); + m = p.matcher(ml); + if (m.find()) + pf = "permanent/" + m.group(1); + else + return ml; + } + return SERVLETURL + pf; + } + + protected ArrayList getAllCollections() throws XmlRpcException, + IOException { + System.out.println("ECHO:"+ECHOURL); + URL echoUrl = new URL(ECHOURL + "/getCollectionsXML"); + Pattern p = Pattern.compile("echoLink=\"(.*)\""); + BufferedReader in = new BufferedReader(new InputStreamReader(echoUrl + .openStream())); + + ArrayList ret = new ArrayList(); + String inputLine; + Matcher m; + while ((inputLine = in.readLine()) != null) { + m = p.matcher(inputLine); + String lit; + if (m.find()) { + + lit = m.group(1); + ret.add(lit); + } + } + + in.close(); + return ret; + } + + private void submitAndReleaseAnObject(String href) throws ClientProtocolException, + IOException, JDOMException { + + addVersionPid(href); + HttpResponse res = submitAnObject(href, "submit"); + System.out.println(EScidocBasicHandler.convertStreamToString(res + .getEntity().getContent())); + res = releaseAnObject(href, "first release"); + System.out.println(EScidocBasicHandler.convertStreamToString(res + .getEntity().getContent())); + + } + + void releaseECHORessources() throws XmlRpcException, IOException, + JDOMException { + ArrayList urls = getAllResources(); + HashMap success = new HashMap(); + HashMap nosuccess = new HashMap(); + int numOfUrl= urls.size(); + int count = 0; + for (String url : urls) { + + XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl(); + XmlRpcClient client = new XmlRpcClient(); + config.setServerURL(new URL(url)); + client.setConfig(config); + + Object[] params = new Object[] {}; + + if (pids == null) { + pids = getPIDsAndEscidocIdsOfCollections(ECHO_CONTAINER_ID); + } + + try { + String parentPid; + String pid = (String) client.execute("getPID", params); + String contid = getIDfromPID("mpiwg:" + pid); + submitAndReleaseAnObject("/ir/item/"+contid); + success.put(pid, url); + } catch (Exception e) { + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + PrintStream s = new PrintStream(out); + e.printStackTrace(s); + + nosuccess.put(url, out.toString()); + + e.printStackTrace(); + } + count+=1; + System.out.println("DONE:"+count+" of "+numOfUrl); + } + System.out.println("SUCCESSFULL ORGANIZED"); + for (String id : success.keySet()) + System.out.println("ID:" + id + " URL:" + success.get(id)); + + System.out.println("ERRORS:"); + for (String id : nosuccess.keySet()) { + System.out.println("URL:" + id); + System.out.println("Message:" + nosuccess.get(id)); + } + + } +} diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/Tools/IngestECHOCall.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/IngestECHOCall.java Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,49 @@ +package de.mpiwg.itgroup.eSciDoc.Tools; +import java.util.ArrayList; +import java.util.HashMap; + +import org.apache.http.HttpResponse; + + +public class IngestECHOCall { + + public static void main(String[] args) throws Exception{ + System.out.println("Starting ECHO call"); + String user=args[0]; + String password=args[1]; + + IngestECHO ie = new IngestECHO(user,password); + //ie.ingestECHOCollections(); + //ie.deleteAllContentOfCollectionWithPID("escidoc:3002", "mpiwg:HQ41PZ3H"); + //ingestECHOCollection("http://127.0.0.1:18080/echo_nav/echo_pages/content/religion/bulgaria"); + //deleteECHOObjects(); + //ingestECHOResource("http://127.0.0.1:18080/echo_nav/echo_pages/content/chineseknowledge/china/zhoubisuanjing"); +// PrintStream out = new PrintStream(new FileOutputStream("/tmp/out.log")); +// +// System.setErr(out); + //ingestECHOResources(); + // + + //HashMap colls = ie.getPIDsAndEscidocIdsOfCollections(IngestECHO.ECHO_CONTAINER_ID); + //ie.organizeECHOCollections(); + //ArrayList urls = ie.findMissingCollections(); + //System.out.println(urls); + //ie.findMissingItems(); + //ie.ingestECHOResources(); + //ie.organizeECHORessources(); +// ie.addVersionPid("/ir/item/escidoc:26033"); +// HttpResponse res = ie.submitAnObject("/ir/item/escidoc:26033", "submit"); +// System.out.println(EScidocBasicHandler.convertStreamToString(res.getEntity().getContent())); +// res = ie.releaseAnObject("/ir/item/escidoc:26033", "first release"); +// System.out.println(EScidocBasicHandler.convertStreamToString(res.getEntity().getContent())); +// + //ie.ingestECHOResource("http://echo.mpiwg-berlin.mpg.de/content/jesuit/jesuit_sciences/kircher_lucis_1646"); + ie.releaseECHORessources(); + //ie.deleteAllContentOfCollection(ie.ECHO_CONTAINER_ID); + //String ret= ie.createMPIWGCollection("ECHO-TESTER-ROOT", "dirk", "contains a test from dw's local echo", "escidoc:21400"); + //System.out.println("Calls:"+ret); + + + } + +} diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/Tools/IngestVLP.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/IngestVLP.java Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,28 @@ +package de.mpiwg.itgroup.eSciDoc.Tools; + +public class IngestVLP extends IngestECHO { + + //protected String ECHOURL = "http://vlp.mpiwg-berlin.mpg.de"; + + + IngestVLP(String user, String password){ + super(user,password); + //ECHOURL = "http://141.14.237.25:18080/vlp/vlp_coll/library/data"; + ECHOURL = "http://vlp.mpiwg-berlin.mpg.de/library/data"; + + ESCIDOC_SERVER_URL = "xserve07.mpiwg-berlin.mpg.de"; + //ZOPEPROVIDER = "http://euler.mpiwg-berlin.mpg.de:28080"; + ECHORESOURCE_TEMPLATE_XML = "VLPResourceTemplate.xml"; + + + ECHOCONTAINER_TEMPLATE_XML = "VLPContainerTemplate.xml"; + + ECHO_CONTAINER_ID = "escidoc:13067"; // enthaelt alle VLP + // Literatutre objekte + ECHO_ROOT_ID = "escidoc:13068"; // enthaelt alle Objekte die + // keiner VLP collection + // angehoeren + MAIN_CONTEXT= "escidoc:3002"; + + } +} diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/Tools/IngestVLPCall.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/IngestVLPCall.java Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,31 @@ +package de.mpiwg.itgroup.eSciDoc.Tools; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.regex.Pattern; + +import org.apache.xmlrpc.XmlRpcException; +import org.jdom.JDOMException; + +public class IngestVLPCall { + + /** + * @param args + * @throws IOException + * @throws JDOMException + * @throws XmlRpcException + */ + public static void main(String[] args) throws IOException, XmlRpcException, JDOMException { + System.out.println("Starting VLP call2"); + String user=args[0]; + String password=args[1]; + + IngestECHO ie = new IngestVLP(user,password); + ie.ingestECHOResources(Pattern.compile(".*/library/data/.*")); + //ie.releaseECHORessources(); + //ArrayList ret = ie.getAllResources(); + //for (String x : ret) + // System.out.println(x); + } + +} diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/Tools/IngestXMLFullText.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/IngestXMLFullText.java Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,34 @@ +package de.mpiwg.itgroup.eSciDoc.Tools; + +import java.io.IOException; +import java.util.ArrayList; + +public class IngestXMLFullText extends IngestECHO { + + public IngestXMLFullText(String user, String password) { + super(user,password); + ECHOURL = "http://echo.mpiwg-berlin.mpg.de/content/historymechanics/archimdesecho"; + + ESCIDOC_SERVER_URL = "euler.mpiwg-berlin.mpg.de"; + ZOPEPROVIDER = "http://euler.mpiwg-berlin.mpg.de:28080"; + ECHORESOURCE_TEMPLATE_XML = "VLPResourceTemplate.xml"; + + + ECHOCONTAINER_TEMPLATE_XML = "VLPContainerTemplate.xml"; + + ECHO_CONTAINER_ID = "escidoc:13067"; // enthaelt alle VLP + // Literatutre objekte + ECHO_ROOT_ID = "escidoc:13068"; // enthaelt alle Objekte die + // keiner VLP collection + // angehoeren + MAIN_CONTEXT= "escidoc:3002"; + } + + public void ingestXMLText() throws Exception{ + ArrayList resources = getAllResources(); + for (String resource : resources){ + ingestECHOResource(resource, true); + } + + } +} diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/Tools/IngestXMLFullTextCall.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/IngestXMLFullTextCall.java Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,23 @@ +package de.mpiwg.itgroup.eSciDoc.Tools; + +import java.io.IOException; +import java.util.regex.Pattern; + +import org.apache.xmlrpc.XmlRpcException; +import org.jdom.JDOMException; + +public class IngestXMLFullTextCall { + public static void main(String[] args) throws Exception { + System.out.println("Starting VLP call2"); + String user=args[0]; + String password=args[1]; + + IngestXMLFullText ie = new IngestXMLFullText(user,password); + //ie.ingestECHOResources(Pattern.compile(".*/library/data/.*")); + ie.ingestXMLText(); + //ie.releaseECHORessources(); + //ArrayList ret = ie.getAllResources(); + //for (String x : ret) + // System.out.println(x); + } +} diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/Tools/Ingestor.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/Ingestor.java Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,521 @@ +package de.mpiwg.itgroup.eSciDoc.Tools; +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import javax.xml.rpc.ServiceException; + +import org.apache.http.HttpEntity; +import org.apache.http.HttpResponse; +import org.apache.http.client.ClientProtocolException; +import org.apache.xmlrpc.XmlRpcException; +import org.apache.xmlrpc.client.XmlRpcClient; +import org.apache.xmlrpc.client.XmlRpcClientConfigImpl; +import org.jdom.Attribute; +import org.jdom.Document; +import org.jdom.Element; +import org.jdom.JDOMException; +import org.jdom.Namespace; +import org.jdom.input.SAXBuilder; +import org.jdom.output.XMLOutputter; +import org.jdom.xpath.XPath; + + + +//import fedora.client.FedoraClient; +//import fedora.server.access.FedoraAPIA; +//import fedora.server.management.FedoraAPIM; + + + +public class Ingestor { + protected EScidocBasicHandler eSciDocHandler = null; + protected String ESCIDOC_SERVER_URL; + protected String ZOPEPROVIDER; + private String USER; + private String PASSWORD; + private int PORT; + + public Ingestor(String SERVER_URL,int ServerPort, String ZOPE, String User, String Password) + { + ESCIDOC_SERVER_URL = SERVER_URL; + ZOPEPROVIDER= ZOPE; + USER = User; + PASSWORD = Password; + PORT = ServerPort; + + } + public String getID() throws MalformedURLException, XmlRpcException { + XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl(); + XmlRpcClient client = new XmlRpcClient(); + config.setServerURL(new URL(ZOPEPROVIDER+"/idGenerator")); + client.setConfig(config); + Object[] params = new Object[]{}; + return (String) client.execute("generateId", params); + } + + + + protected EScidocBasicHandler getEsciDocHandler(){ + if (eSciDocHandler==null) + { + eSciDocHandler = new EScidocBasicHandler(ESCIDOC_SERVER_URL,PORT,USER,PASSWORD); + } + return eSciDocHandler; + } + + public String ingest (String command,String xml) throws ServiceException, IOException{ + + //File ff = new File ("/tmp/test3.xml"); + + //FileInputStream stream = new FileInputStream(ff); + InputStream stream = new ByteArrayInputStream(xml.getBytes("utf-8")); + + + + //DefaultHttpClient httpclient = login(); + + + + HttpResponse eScidocPut = getEsciDocHandler().eScidocPut(command, stream); + + + HttpEntity responseEntity = eScidocPut.getEntity(); + + System.out.println("----------------------------------------"); + System.out.println(eScidocPut.getStatusLine()); + + InputStream st = responseEntity.getContent(); + //System.out.println(EScidocBasicHandler.convertStreamToString(st)); + String xmlret = EScidocBasicHandler.convertStreamToString(st); + + return xmlret; + +} + + public void addToCollection(String collection_url,String newMember) throws ClientProtocolException, IOException{ + HttpResponse ret = getEsciDocHandler().eScidocGet("/ir/container/"+collection_url); + String retTxt = EScidocBasicHandler.convertStreamToString(ret.getEntity().getContent()); + String dateStamp = EScidocBasicHandler.getDateStamp(retTxt); + + + + String param = ""; + param+=""+newMember+""; + param+=""; + + ByteArrayInputStream stream = new ByteArrayInputStream(param.getBytes()); + HttpResponse result = getEsciDocHandler().eScidocPost("/ir/container/"+collection_url+"/members/add",stream); + + System.out.println("----->addResult:"+result.getStatusLine()); + + System.out.println("added "+newMember+" to "+collection_url); + + } + + public List getAllMembers(String href) throws ClientProtocolException, IOException, JDOMException + { + String command=href+"/members/filter"; + //String filter="escidoc:7017"; + String filter=""; + System.out.println(filter); + HttpResponse result = getEsciDocHandler().eScidocPost(command, new ByteArrayInputStream(filter.getBytes())); + + InputStream stream = result.getEntity().getContent(); + //System.out.println(EScidocBasicHandler.convertStreamToString(stream)); + //stream.reset(); + SAXBuilder builder = new SAXBuilder(); + + + Document doc = builder.build(stream); + String pid = null; + XPath xpath = XPath.newInstance("//container:container"); + xpath.addNamespace("container", EScidocNameSpaceContext.container); + + List nodes = xpath.selectNodes(doc); + + xpath = XPath.newInstance("//item:item"); + xpath.addNamespace("item", EScidocNameSpaceContext.item); + nodes.addAll(nodes = xpath.selectNodes(doc)); + + List ret = new ArrayList(); + + for (Element el : nodes){ + //XMLOutputter output = new XMLOutputter(); + //output.output(el, System.out); + xpath =XPath.newInstance("@xlink:href"); + xpath.addNamespace("xlink", EScidocNameSpaceContext.xlink); + + Attribute idAtrr = (Attribute) xpath.selectSingleNode(el); + + String id = idAtrr.getValue(); + + + + ret.add(id); + + } + + return ret; + + + } + public HashMap getPIDsAndEscidocIdsOfCollections(String collectionContainer) throws ClientProtocolException, IOException, JDOMException{ + + HashMap ret = new HashMap(); + String command="/ir/container/"+collectionContainer+"/members/filter"; + //String filter="escidoc:7017"; + String filter=""; + System.out.println(filter); + HttpResponse result = getEsciDocHandler().eScidocPost(command, new ByteArrayInputStream(filter.getBytes())); + + InputStream stream = result.getEntity().getContent(); + //System.out.println(EScidocBasicHandler.convertStreamToString(stream)); + //stream.reset(); + SAXBuilder builder = new SAXBuilder(); + + + Document doc = builder.build(stream); + String pid = null; + XPath xpath = XPath.newInstance("//container:container"); + xpath.addNamespace("container", EScidocNameSpaceContext.container); + + List nodes = xpath.selectNodes(doc); + for (Element el : nodes){ + //XMLOutputter output = new XMLOutputter(); + //output.output(el, System.out); + xpath =XPath.newInstance(".//container:properties/prop:pid"); + xpath.addNamespace("prop", EScidocNameSpaceContext.prop); + xpath.addNamespace("container", EScidocNameSpaceContext.container); + Element pidEl =(Element) xpath.selectSingleNode(el); + pid =pidEl.getText(); + + Namespace ns = Namespace.getNamespace("xlink",EScidocNameSpaceContext.xlink); + + xpath =XPath.newInstance("@xlink:href"); + xpath.addNamespace(ns); + + Attribute idAtrr = (Attribute) xpath.selectSingleNode(el); + List attrs = el.getAttributes(); + String id = idAtrr.getValue(); + + String[] splitted = id.split("/"); + + ret.put(pid, splitted[splitted.length-1]); + + } + + return ret; + + + } + protected InputStream getXMLfromPID(String pid,String context) + throws ClientProtocolException, IOException { + String filter = ""; + + filter += pid; + filter += ""; + + String command = "/ir/context/" + context + + "/resources/members/filter"; + HttpResponse result = getEsciDocHandler().eScidocPost(command, + new ByteArrayInputStream(filter.getBytes())); + + return result.getEntity().getContent(); + +} + public void deleteAllContentOfCollectionWithPID(String collection_Id, String pid) throws ClientProtocolException, IOException, JDOMException + { + InputStream xmlStream = getXMLfromPID(pid, collection_Id); + SAXBuilder builderA = new SAXBuilder(); + Document docA = builderA.build(xmlStream); + + XPath xpathA = XPath.newInstance("//member-list:member-list/container:container/@xlink:href"); + xpathA.addNamespace("member-list", "http://www.escidoc.de/schemas/memberlist/0.7"); + xpathA.addNamespace("container", EScidocNameSpaceContext.container); + xpathA.addNamespace("xlink", EScidocNameSpaceContext.xlink); + + List nodesA = xpathA.selectNodes(docA); + + + + for (Attribute nodeA: nodesA){ + String id = nodeA.getValue(); + String filter=""; + String command=id+"/members/filter"; + HttpResponse result = getEsciDocHandler().eScidocPost(command, new ByteArrayInputStream(filter.getBytes())); + + xmlStream = result.getEntity().getContent(); + + + XMLOutputter xmlout = new XMLOutputter(); + SAXBuilder builder = new SAXBuilder(); + Document doc = builder.build(xmlStream); + + XPath xpath = XPath.newInstance("//item:item"); + xpath.addNamespace("item", EScidocNameSpaceContext.item); + List nodes = xpath.selectNodes(doc); + + for (Element node: nodes) + { + xpath = XPath.newInstance("@xlink:href"); + xpath.addNamespace("xlink", EScidocNameSpaceContext.xlink); + Attribute attribute = (Attribute) xpath.selectSingleNode(node); + String href = attribute.getValue(); + delete(href); + } + + xpath = XPath.newInstance("//container:container"); + xpath.addNamespace("container", EScidocNameSpaceContext.container); + nodes = xpath.selectNodes(doc); + + for (Element node: nodes) + { + xpath = XPath.newInstance("@xlink:href"); + xpath.addNamespace("xlink", EScidocNameSpaceContext.xlink); + Attribute attribute = (Attribute) xpath.selectSingleNode(node); + String href = attribute.getValue(); + removeMembers(href); + delete(href); + } + delete(id); + + } + } + + public void deleteAllContentOfCollection(String collection_Id) throws ClientProtocolException, IOException, JDOMException + { + HashMap pids = getPIDsAndEscidocIdsOfCollections(collection_Id); + for (String id: pids.values()){ + String filter=""; + String command="/ir/container/"+id+"/members/filter"; + HttpResponse result = getEsciDocHandler().eScidocPost(command, new ByteArrayInputStream(filter.getBytes())); + + InputStream xmlStream = result.getEntity().getContent(); + + + XMLOutputter xmlout = new XMLOutputter(); + SAXBuilder builder = new SAXBuilder(); + Document doc = builder.build(xmlStream); + + XPath xpath = XPath.newInstance("//item:item"); + xpath.addNamespace("item", EScidocNameSpaceContext.item); + List nodes = xpath.selectNodes(doc); + + for (Element node: nodes) + { + xpath = XPath.newInstance("@xlink:href"); + xpath.addNamespace("xlink", EScidocNameSpaceContext.xlink); + Attribute attribute = (Attribute) xpath.selectSingleNode(node); + String href = attribute.getValue(); + delete(href); + } + + xpath = XPath.newInstance("//container:container"); + xpath.addNamespace("container", EScidocNameSpaceContext.container); + nodes = xpath.selectNodes(doc); + + for (Element node: nodes) + { + xpath = XPath.newInstance("@xlink:href"); + xpath.addNamespace("xlink", EScidocNameSpaceContext.xlink); + Attribute attribute = (Attribute) xpath.selectSingleNode(node); + String href = attribute.getValue(); + removeMembers(href); + delete(href); + } + + } + } + + private void removeMembers(String href) throws ClientProtocolException, IOException, JDOMException { + + List cols = getAllMembers(href); + HttpResponse result = getEsciDocHandler().eScidocGet(href); + String ret= EScidocBasicHandler.convertStreamToString(result.getEntity().getContent()); + String ds = getEsciDocHandler().getDateStamp(ret); + + String delString=""; + for (String colid: cols){ + String[] colids = colid.split("/"); + + delString+=""+colids[colids.length-1]+""; + } + delString += ""; + System.out.println(delString); + InputStream body = new ByteArrayInputStream(delString.getBytes()); + result = getEsciDocHandler().eScidocPost(href+"/members/remove", body); + System.out.println("stat:"+result.getStatusLine()); + try { + System.out.println(EScidocBasicHandler.convertStreamToString(result.getEntity().getContent())); + } catch (Exception e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + } + + private void delete(String href) throws ClientProtocolException, IOException { + System.out.println(href); + HttpResponse result = getEsciDocHandler().eScidocDelete(href); + System.out.println("stat:"+result.getStatusLine()); + try { + System.out.println(EScidocBasicHandler.convertStreamToString(result.getEntity().getContent())); + } catch (Exception e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + } + public String createMPIWGCollection(String title, String creator,String description,String parentID) throws Exception{ + + // get a PID for the Collection + + String pid = getID(); + eSciDocXmlObject obj = new eSciDocXmlObject("mpiwg:"+pid,"MPIWGContainerTemplate.xml"); + + HashMap dcs = new HashMap(); // Store for the metadata + + //Verbinde dich mit der Collection Ÿber XML-rpc + + + dcs.put("description", description); + dcs.put("title", title); + dcs.put("creator", creator); + obj.insertDC(dcs); + //obj.setRelationship("info:fedora/echo:col1"); + String xml = obj.printXML(); + //System.out.println(xml); + String ret = ingest("/ir/container",xml); + String contid =EScidocBasicHandler.getId(ret); + + if (parentID!=null) + addToCollection(parentID,contid); + + + return contid; + + } + + public String createContentModell() throws ClientProtocolException, IOException{ + File cmf = new File("MPWGContentModelTemplate.xml"); + FileInputStream stream = new FileInputStream(cmf); + + HttpResponse ret = getEsciDocHandler().eScidocPost("/cmm/content-model", stream); + + String retString = EScidocBasicHandler.convertStreamToString(ret.getEntity().getContent()); + return EScidocBasicHandler.getId(retString); + + + } + + public HttpResponse submitAnObject(String href,String comment) throws ClientProtocolException, IOException{ + HttpResponse ret = getEsciDocHandler().eScidocGet(href); + String retTxt = EScidocBasicHandler.convertStreamToString(ret.getEntity().getContent()); + String dateStamp = EScidocBasicHandler.getDateStamp(retTxt); + + String param = ""; + param+=""+comment+""; + param+=""; + + String command=href+"/submit"; + HttpResponse result = getEsciDocHandler().eScidocPost(command, EScidocBasicHandler.convertStringToStream(param)); + + return result; + + + } + public HttpResponse addVersionPid(String href) throws ClientProtocolException, IOException, JDOMException{ + HttpResponse ret = getEsciDocHandler().eScidocGet(href); + String retTxt = EScidocBasicHandler.convertStreamToString(ret.getEntity().getContent()); + + String dateStamp = EScidocBasicHandler.getDateStamp(retTxt); + + + String pid=EScidocBasicHandler.getPID(retTxt); + String versionNumber = EScidocBasicHandler.getLastVersionNumber(retTxt); + + String vpid=pid+":"+versionNumber; + + String param = ""; + param+=""+vpid+""; + param+=""; + String command=href+"/assign-version-pid"; + + HttpResponse result = getEsciDocHandler().eScidocPost(command, EScidocBasicHandler.convertStringToStream(param)); + + return result; + + + } + + public HttpResponse releaseAnObject(String href,String comment) throws ClientProtocolException, IOException{ + HttpResponse ret = getEsciDocHandler().eScidocGet(href); + String retTxt = EScidocBasicHandler.convertStreamToString(ret.getEntity().getContent()); + String dateStamp = EScidocBasicHandler.getDateStamp(retTxt); + + String param = ""; + param+=""+comment+""; + param+=""; + + String command=href+"/release"; + HttpResponse result = getEsciDocHandler().eScidocPost(command, EScidocBasicHandler.convertStringToStream(param)); + + return result; + + + } + public HttpResponse withdrawAnObject(String href, String comment) throws ClientProtocolException, IOException{ + HttpResponse ret = getEsciDocHandler().eScidocGet(href); + String retTxt = EScidocBasicHandler.convertStreamToString(ret.getEntity().getContent()); + String dateStamp = EScidocBasicHandler.getDateStamp(retTxt); + + String param = ""; + param+=""+comment+""; + param+=""; + + String command=href+"/withdraw"; + HttpResponse result = getEsciDocHandler().eScidocPost(command, EScidocBasicHandler.convertStringToStream(param)); + + return result; + } + public HttpResponse reviseAnObject(String href, String comment) throws ClientProtocolException, IOException{ + HttpResponse ret = getEsciDocHandler().eScidocGet(href); + String retTxt = EScidocBasicHandler.convertStreamToString(ret.getEntity().getContent()); + String dateStamp = EScidocBasicHandler.getDateStamp(retTxt); + + String param = ""; + param+=""+comment+""; + param+=""; + + String command=href+"/revise"; + HttpResponse result = getEsciDocHandler().eScidocPost(command, EScidocBasicHandler.convertStringToStream(param)); + + return result; + + } + public String resubmitAnObject(String href, String comment) throws ClientProtocolException, IOException, ServiceException { + HttpResponse ret = getEsciDocHandler().eScidocGet(href); + String retTxt = EScidocBasicHandler.convertStreamToString(ret.getEntity().getContent()); + System.out.println(retTxt); + retTxt=ingest("/ir/item", retTxt); + System.out.println(retTxt); + return retTxt; + + } + + + } + + + diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/Tools/MyFileNameFilter.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/MyFileNameFilter.java Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,16 @@ +package de.mpiwg.itgroup.eSciDoc.Tools; +import java.io.File; +import java.io.FilenameFilter; + + +public class MyFileNameFilter implements FilenameFilter { + + public boolean accept(File dir, String name) { + if (name.charAt(0)=='.') + return false; + + + return true; + } + +} diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/Tools/SendFileToeSciDoc.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/SendFileToeSciDoc.java Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,65 @@ +package de.mpiwg.itgroup.eSciDoc.Tools; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; + +import org.apache.http.HttpResponse; +import org.apache.http.client.ClientProtocolException; + + +public class SendFileToeSciDoc { + + + //private static final String ESCIDOC_SERVER = "xserve07.mpiwg-berlin.mpg.de"; + private static final String ESCIDOC_SERVER = "escidoc-test.mpiwg-berlin.mpg.de"; + + /** + * @param args + * @throws IOException + * @throws IllegalStateException + */ + + public static void main(String[] args) throws IllegalStateException, IOException { + if (args.length<4){ + System.out.println("Usage: sendfile command urlOfFile user password"); + return; + } + + EScidocBasicHandler handler = new EScidocBasicHandler(ESCIDOC_SERVER,8080,args[2],args[3]); + URL url; + HttpResponse ret; + try { + url = new URL(args[1]); + } catch (MalformedURLException e) { + + e.printStackTrace(); + return; + } + try { + ret = handler.eScidocPut(args[0], url); + } catch (ClientProtocolException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + return; + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + return; + } + + System.out.println(ret.getStatusLine()); + String outTXT = EScidocBasicHandler.convertStreamToString(ret.getEntity().getContent()); + System.out.println(outTXT); + +// File nf = new File("/tmp/out.log"); +// FileWriter fw = new FileWriter(nf); +// fw.append(outTXT); +// fw.close(); + + + + } + +} diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/Tools/ingestLib.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/Tools/ingestLib.java Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,212 @@ +package de.mpiwg.itgroup.eSciDoc.Tools; +import java.io.IOException; +import java.io.StringReader; +import java.net.MalformedURLException; +import java.net.URL; +import java.rmi.RemoteException; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.rpc.ServiceException; + +import org.apache.axis.types.NonNegativeInteger; +import org.apache.xmlrpc.XmlRpcException; +import org.apache.xmlrpc.client.XmlRpcClient; +import org.apache.xmlrpc.client.XmlRpcClientConfigImpl; +import org.w3c.dom.Document; +import org.xml.sax.InputSource; + + +public class ingestLib extends IngestECHO{ + + /** + * @param args + * @throws Exception + * @throws Exception + */ + + private static String ZOPEPROVIDER = "http://127.0.0.1:18080"; + private static String createFoxml(String litid) throws Exception{ + + String pid = + eSciDocXmlObject obj = new eSciDocXmlObject("mpiwg:"+pid,"vlpRessourceTemplate.xml"); + //obj.setTitle("lit11111 - title"); + + + //get DC Metadata from the Ressource + XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl(); + XmlRpcClient client = new XmlRpcClient(); + Object[] params = new Object[]{}; + config.setServerURL(new URL(ZOPEPROVIDER+"/metadataMain")); + client.setConfig(config); + params = new Object[]{"http://vlp.mpiwg-berlin.mpg.de/library/data/"+litid+"/index_meta"}; + String result = (String) client.execute("getDCFormatted", params); + System.out.println("dC:"+result); + + + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + factory.setNamespaceAware(true); + DocumentBuilder db =factory.newDocumentBuilder(); + InputSource resultStream = new InputSource(new StringReader(result)); + Document dc = db.parse(resultStream); + obj.insertDC(dc); + + obj.setReferenceFolder("/mpiwg/online/permanent/vlp/"+litid); + //obj.generateXMLIndex(new File("/mpiwg/online/permanent/vlp/"+litid)); + //long date = new Date().getTime(); + SimpleDateFormat dateformat = new SimpleDateFormat("yyyy-MM-dd'T'hh:mm:ss.S'Z'"); + String dateStr = dateformat.format(new Date()); + obj.addWebUrl("http://vlp.mpiwg-berlin.mpg.de/references?id="+litid, litid, dateStr); + obj.addIndexMetaUrl("http://vlp.mpiwg-berlin.mpg.de/library/data/"+litid+"/index_meta", litid, dateStr); + obj.setRelationship("info:fedora/vlp:col1"); + obj.setVLPId(litid); + return obj.printXML(); + + } + + + + + + + private static void ingestAllVLPObjects() throws MalformedURLException, + XmlRpcException { + XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl(); + XmlRpcClient client = new XmlRpcClient(); + config.setServerURL(new URL("http://127.0.0.1:18080/vlp/vlp_coll/library/data")); + client.setConfig(config); + Object[] params = new Object[]{}; + Object[] res = (Object[]) client.execute("getAllRessources", params); + + String string = ""; + for (Object re: res){ + Object[] rA = (Object [])re; + String[] splitted = ((String) rA[0]).split("/"); + String id = splitted[splitted.length-1]; + + try { + String xml = createFoxml(id); + + String ret = ingest(xml); + + config.setServerURL(new URL((String)rA[1])); + client.setConfig(config); + params = new Object[]{ret}; + client.execute("setPID", params); + + System.out.println(ret); + + + } catch (Exception e) { + System.err.println("cannot get:"+(String) rA[0]); + + } + } + } + + private static void modifyDCSet(String litid, String PID) throws XmlRpcException, ServiceException, IOException + { + XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl(); + XmlRpcClient client = new XmlRpcClient(); + config.setServerURL(new URL(ZOPEPROVIDER+"/metadataMain")); + + + client.setConfig(config); + Object[] params = new Object[]{"http://vlp.mpiwg-berlin.mpg.de/library/data/"+litid+"/index_meta"}; + String result = (String) client.execute("getDCFormatted", params); + //System.out.println("dC:"+result); + + FedoraAPIM APIM; + FedoraAPIA APIA; + + System.setProperty("javax.net.ssl.trustStore", "/usr/local/fedora/tomcat/conf/keystore"); + + + String baseURL = "https://127.0.0.1:8443/fedora"; + FedoraClient fc = new FedoraClient(baseURL, "fedoraAdmin", "XXXX"); + APIA=fc.getAPIA(); + APIM=fc.getAPIM(); + String content = "VLP Literature ObjectMPIWG"; + content += result; + content += ""; + //content=""; + + //System.out.println(content); + + + String chksum = MD5.asHex(new MD5(content).Final()); + + //System.out.println(chksum); + String[] em= new String[]{}; + String pid = APIM.modifyDatastreamByValue(PID, "DC",em , "Dublin Core Record for this object", "text/xml", "",content.getBytes("utf-8"), null, null,"metadata changed",false); + System.out.println(pid); + + } + + + public static void main(String[] args) throws Exception { + + //ingestAllVLPObjects(); + changeDCMetadata(); + //modifyDCSet("lit14191","mpiwg:PR9MPM4E"); + +} + + private static void changeDCMetadata() throws MalformedURLException, + ServiceException, IOException, RemoteException, XmlRpcException { + System.setProperty("javax.net.ssl.trustStore", "/usr/local/fedora/tomcat/conf/keystore"); + + String baseURL = "https://127.0.0.1:8443/fedora"; + FedoraClient fc = new FedoraClient(baseURL, "fedoraAdmin", "XXX"); + FedoraAPIA APIA; + APIA=fc.getAPIA(); + Condition[] condition = + {new Condition("pid", ComparisonOperator.has, "mpiwg:*")}; + FieldSearchQuery query = new FieldSearchQuery(condition, null); + + NonNegativeInteger maxResults = new NonNegativeInteger("" + 200); + + FieldSearchResult res = APIA.findObjects(new String[]{"pid"}, maxResults, query); + processResults(APIA, res); + while (true) + { + ListSession ses = res.getListSession(); + if (ses == null) + break; + String tok = ses.getToken(); + if (tok == null) + break; + + res = APIA.resumeFindObjects(res.getListSession().getToken()); + if (res.getResultList().length>0) + processResults(APIA, res); + else + break; + } + } + + private static void processResults(FedoraAPIA APIA, FieldSearchResult res) + throws XmlRpcException, ServiceException, IOException { + ObjectFields[] fields = res.getResultList(); + + System.out.println("found:"+fields.length); + for (ObjectFields field: fields){ + String pid = field.getPid(); + MIMETypedStream ds = APIA.getDatastreamDissemination(pid, "vlp-admin", null); + byte[] x = ds.getStream(); + String s = new String(x); + //System.err.println(s); + + Pattern p = Pattern.compile("(lit.*)"); + Matcher m = p.matcher(s); + m.find(); + String lit = m.group(1); + System.out.println(lit); + modifyDCSet(lit, pid); + } + } +} diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/config/echo.properties --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/config/echo.properties Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,3 @@ +textServletUrl=http://nausikaa2.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter?fn= +dirInfoUrl=http://nausikaa2.mpiwg-berlin.mpg.de/digitallibrary/dirInfo-xml.jsp?fn= +metaDataManager=http://localhost:48080/MetaDataManagerRestlet \ No newline at end of file diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/echoObjects/ECHOCollection.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/echoObjects/ECHOCollection.java Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,21 @@ +package de.mpiwg.itgroup.eSciDoc.echoObjects; + +import java.io.IOException; +import java.util.Properties; + +public class ECHOCollection extends ECHOObject { + + + + public ECHOCollection(String name, String echoUrl) throws IOException{ + super(); + CMM ="/cmm/content-model/escidoc:11004"; + + this.name = name; + this.echoUrl = echoUrl; + + + + } + +} diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/echoObjects/ECHOLink.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/echoObjects/ECHOLink.java Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,5 @@ +package de.mpiwg.itgroup.eSciDoc.echoObjects; + +public class ECHOLink { + +} diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/echoObjects/ECHOObject.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/echoObjects/ECHOObject.java Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,95 @@ +package de.mpiwg.itgroup.eSciDoc.echoObjects; + +import java.io.IOException; +import java.io.InputStream; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.Properties; + + +import org.apache.http.HttpResponse; +import org.apache.http.client.HttpClient; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.impl.client.DefaultHttpClient; +import org.apache.log4j.Logger; +import org.apache.xmlrpc.XmlRpcException; +import org.apache.xmlrpc.client.XmlRpcClient; +import org.apache.xmlrpc.client.XmlRpcClientConfigImpl; + +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler; +import de.mpiwg.itgroup.metadataManager.pid.PidGenerator; + +public class ECHOObject { + Logger logger = Logger.getRootLogger(); + public String CMM; // to be overwritten oder initialized with the CMM of this model. + public String name; + public String echoUrl; + public String pid; + public String metadataMananagerUrl; + public String context; + public String description =""; + + public ECHOObject() throws IOException{ + Properties echoProperties = new Properties(); + echoProperties.load(getClass().getResourceAsStream("/de/mpiwg/itgroup/eSciDoc/config/echo.properties")); + + metadataMananagerUrl = (String) echoProperties.get("metaDataManager"); + + } + + public String getOrCreatePID() throws Exception { + Object[] parameters = new Object[] {}; + + String pid = null; + + //Verbinde mit ECHO + XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl(); + try { + config.setServerURL(new URL(echoUrl)); + } catch (MalformedURLException e1) { + logger.info("PID-Generator: cannot contact"+echoUrl); + pid = null; + } + XmlRpcClient client = new XmlRpcClient(); + client.setConfig(config); + // Hole pid aus ECHO + try { + pid = (String) client.execute("getPID", parameters); + } catch (Exception e) { + logger.info("PID-Generator: cannot contact"+echoUrl); + pid = null; + } + // Falls dort noch keine ist, erzeuge ein neue + if (pid == null) + { + pid = getPid(); + + } + else + logger.info("PID from ECHO:" + pid); + + this.pid=pid; +return pid; +} + + public String getPid() throws Exception, IOException { + String pid; + HttpClient hc = new DefaultHttpClient(); + HttpPost pm = new HttpPost(metadataMananagerUrl+"/PID"); + HttpResponse ret; + try { + ret = hc.execute(pm); + + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + throw new Exception("cannot generate PID"); + } + pid=EScidocBasicHandler.convertStreamToString(ret.getEntity().getContent()); + if (ret.getStatusLine().getStatusCode()>300){ + //TODO define better Exceptions + throw new Exception("cannot generate PID"); + } + return pid; + } +} diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/echoObjects/ECHORessource.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/echoObjects/ECHORessource.java Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,193 @@ +package de.mpiwg.itgroup.eSciDoc.echoObjects; + +import java.io.IOException; +import java.net.URI; +import java.util.Properties; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + + +import org.apache.http.Header; +import org.apache.http.HttpException; +import org.apache.http.HttpHost; +import org.apache.http.HttpResponse; +import org.apache.http.client.ClientProtocolException; +import org.apache.http.client.HttpClient; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.client.methods.HttpUriRequest; +import org.apache.http.impl.client.DefaultHttpClient; +import org.apache.http.protocol.BasicHttpContext; +import org.apache.http.protocol.ExecutionContext; +import org.apache.http.protocol.HttpContext; + +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler; +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools; +import de.mpiwg.itgroup.metadataManager.pid.DCTransformer; + +public class ECHORessource extends ECHOObject{ + + + public String archivePath; + public String metaData =""; + public String fullText =""; + private String textServletUrl; + private String dirInfoUrl; + public String link; + + + public ECHORessource() throws IOException{ + super(); + CMM ="/cmm/content-model/escidoc:11003"; + Properties echoProperties = new Properties(); + echoProperties.load(getClass().getResourceAsStream("/de/mpiwg/itgroup/eSciDoc/config/echo.properties")); + + textServletUrl = (String) echoProperties.get("textServletUrl"); + dirInfoUrl = (String) echoProperties.get("dirInfoUrl"); + + } + + + public ECHORessource(String name, String archivePath, String echoUrl) throws IOException { + this(); + CMM ="/cmm/content-model/escidoc:11003"; + this.name = name; + this.archivePath = archivePath; + this.echoUrl = echoUrl; + this.link = getLinkFromUrl(echoUrl); + + + } + + /** Holt die URL auf die das ECHO Objekt redirected + * @param echoUrl + * @return + */ + private String getLinkFromUrl(String echoUrl) { + if (echoUrl==null) + return null; + HttpContext localContext = new BasicHttpContext(); + HttpClient hc = new DefaultHttpClient(); + HttpGet get = new HttpGet(echoUrl); + HttpResponse response; + HttpUriRequest req; + HttpHost target; + + try { + response = hc.execute(get,localContext); + req = (HttpUriRequest) localContext.getAttribute( + ExecutionContext.HTTP_REQUEST); + target = (HttpHost) localContext.getAttribute( + ExecutionContext.HTTP_TARGET_HOST); + + + } catch (ClientProtocolException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + return ""; + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + return ""; + } + URI ret = req.getURI(); + + return target.toString()+ret.toString(); + } + + + public String toString(){ + String format="NAME: %s; archivePAth: %s; metaData: %s; fullText %s; url %s"; + + return String.format(format, name,archivePath,metaData,fullText,echoUrl); + + } + + + /** + * Baue Metadatalink um, dass er auf das Servlet zeigt. + * @param ml + * @return + */ + public String correctML(String ml) { + Pattern p = Pattern.compile("experimental/(.*)"); + Matcher m = p.matcher(ml); + String pf; + if (m.find()) + pf = "experimental/" + m.group(1); + else { + p = Pattern.compile("permanent/(.*)"); + m = p.matcher(ml); + if (m.find()) + pf = "permanent/" + m.group(1); + else + return ml; + } + return textServletUrl + pf; + } + + static public String correct(String ml){ + Pattern p = Pattern.compile("experimental/(.*)"); + Matcher m = p.matcher(ml); + String pf; + if (m.find()) + pf = "experimental/" + m.group(1); + else { + p = Pattern.compile("permanent/(.*)"); + m = p.matcher(ml); + if (m.find()) + pf = "permanent/" + m.group(1); + else + return ml; + } + return pf; + } + + + public String getImageFolderPath() { + DCTransformer trans = new DCTransformer(metaData); + String path = trans.getImagePathFromIndexMeta(); + if (path==null || path.equals("")){ + path=archivePath+"/pageimg"; + } else { + path=archivePath+"/"+path; + } + + String testPath=correct(path); // get rid of everything before eperimental or permanent + if(testPath(testPath)) + return path; + + return null; + } + + + private boolean testPath(String path) { + HttpClient client = new DefaultHttpClient(); + HttpGet get; + try { + get = new HttpGet(dirInfoUrl+path); + } catch (RuntimeException e){ + e.printStackTrace(); + return false; + } + + try { + HttpResponse response = client.execute(get); + String body = EScidocBasicHandler.convertStreamToString(response.getEntity().getContent());; + + Pattern p = Pattern.compile("(.*)",Pattern.DOTALL); + Matcher m = p.matcher(body); + if (m.find()){ // dir body leer + if (m.group(1).equals("")) + return false; + else + return true; + } + + + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + return false; + } +} diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/foxridge/FoxridgeRessource.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/foxridge/FoxridgeRessource.java Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,25 @@ +package de.mpiwg.itgroup.eSciDoc.foxridge; + +import java.io.IOException; + +import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHORessource; + + + +public class FoxridgeRessource extends ECHORessource { + + + + public FoxridgeRessource() throws IOException { + super(); + CMM ="/cmm/content-model/escidoc:13001"; + + // TODO Auto-generated constructor stub + } + + public FoxridgeRessource(String name, String absolutePath, String string) throws IOException { + super(name, absolutePath, string); + CMM ="/cmm/content-model/escidoc:13001"; + } + +} diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/foxridge/IndexMetaIterator.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/foxridge/IndexMetaIterator.java Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,125 @@ +package de.mpiwg.itgroup.eSciDoc.foxridge; + + +/* + * Copyright 2000-2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.util.Enumeration; +import java.util.Iterator; +import java.util.Stack; +import java.util.Vector; + + + +import org.jdom.Document; +import org.jdom.JDOMException; +import org.jdom.input.SAXBuilder; + +import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHOObject; +import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHORessource; + +/** + * An iterator which iterates through the contents of a java directory. The + * iterator should be created with the directory at the root of the Java + * namespace. + * + */ +public class IndexMetaIterator implements Iterator { + + private File rootFolder; + private File currentFolder; + private Stack stack; + + + public IndexMetaIterator(File rootFolder){ + this.rootFolder=rootFolder; + this.currentFolder=rootFolder; + this.stack = new Stack(); + for (File f:rootFolder.listFiles()){ + stack.push(f); + } + } + @Override + public boolean hasNext() { + // TODO Auto-generated method stub + return !stack.isEmpty(); + } + + @Override + public ECHOObject next() { + // TODO Auto-generated method stub + File nextFile = stack.pop(); + while(!nextFile.getName().endsWith(".meta") && !stack.isEmpty()){ + System.out.println("CHECK_________"+nextFile.getName()); + if(!nextFile.getName().equals("pageimg")){ //skip pageimg + if(nextFile.isDirectory()){ + for (File f:nextFile.listFiles()){ + stack.push(f); + } + } + } + nextFile = stack.pop(); + } + if (!nextFile.getName().endsWith(".meta")) //der letzte Eintrag muss noch gretrennt getestet werden. + nextFile = null; + System.out.println("FOUND:"+nextFile); + try { + return createECHOObject(nextFile); + } catch (JDOMException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (Exception e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + return null; + } + + private ECHOObject createECHOObject(File nextFile) throws Exception { + + //Document doc = new SAXBuilder().build(nextFile); + try{ + FoxridgeRessource er = new FoxridgeRessource(nextFile.getParentFile().getName(),nextFile.getParentFile().getAbsolutePath(),null); + + er.metaData = er.correctML(nextFile.getAbsolutePath()); + er.pid=er.getPid(); + er.echoUrl=er.metaData; //TODO find a better solution, what to present here, z.b. texttool-tag auswerten. + return er; + } catch (Exception e) { + // TODO Auto-generated catch block + e.printStackTrace(); + throw new Exception(); + } + } + @Override + public void remove() { + // TODO Auto-generated method stub + + } + + + +} + + + diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/foxridge/IndexMetaWalker.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/foxridge/IndexMetaWalker.java Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,20 @@ +package de.mpiwg.itgroup.eSciDoc.foxridge; + +import java.io.File; +import java.util.Iterator; + +import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHOObject; + +public class IndexMetaWalker implements Iterable { + + private File rootFolder; + + public IndexMetaWalker(File rootFolder){ + this.rootFolder = rootFolder; + } + public Iterator iterator() { + + return new IndexMetaIterator(rootFolder); + } + +} diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/harvesting/ECHO_ESCIDOC_init.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/harvesting/ECHO_ESCIDOC_init.java Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,94 @@ +package de.mpiwg.itgroup.eSciDoc.harvesting; + +import java.io.IOException; +import java.io.InputStream; + + + +import org.apache.http.HttpResponse; +import org.apache.log4j.BasicConfigurator; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.jdom.Attribute; +import org.jdom.Document; +import org.jdom.JDOMException; +import org.jdom.input.SAXBuilder; +import org.jdom.xpath.XPath; + +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler; +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools; +import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject; + +public class ECHO_ESCIDOC_init { + + private EScidocBasicHandler connector; + + public ECHO_ESCIDOC_init(EScidocBasicHandler connector) { + this.connector = connector; + } + + public String createResourceCM() throws IOException, IllegalStateException, JDOMException{ + InputStream str = getClass().getResourceAsStream("/de/mpiwg/itgroup/eSciDoc/xmlTemplates/ECHOResource_content-model.xml"); + HttpResponse result = connector.eScidocPut("/cmm/content-model", str); + + Document doc = new SAXBuilder().build(result.getEntity().getContent()); + + + XPath xp = EScidocTools.getESciDocXpath("//escidocContentModel:content-model/@xlink:href"); + Attribute href = (Attribute) xp.selectSingleNode(doc); + + + return href.getValue(); + } + + public String createCollectionCM() throws IOException, IllegalStateException, JDOMException{ + InputStream str = getClass().getResourceAsStream("/de/mpiwg/itgroup/eSciDoc/xmlTemplates/ECHOCollection_content-model.xml"); + HttpResponse result = connector.eScidocPut("/cmm/content-model", str); + Document doc = new SAXBuilder().build(result.getEntity().getContent()); + + + XPath xp = EScidocTools.getESciDocXpath("//escidocContentModel:content-model/@xlink:href"); + Attribute href = (Attribute) xp.selectSingleNode(doc); + + + return href.getValue(); + } + + public String createFoxridgeResourceCM() throws IOException, IllegalStateException, JDOMException{ + InputStream str = getClass().getResourceAsStream("/de/mpiwg/itgroup/eSciDoc/xmlTemplates/FoxridgeResource_content-model.xml"); + HttpResponse result = connector.eScidocPut("/cmm/content-model", str); + Document doc = new SAXBuilder().build(result.getEntity().getContent()); + + + XPath xp = EScidocTools.getESciDocXpath("//escidocContentModel:content-model/@xlink:href"); + Attribute href = (Attribute) xp.selectSingleNode(doc); + + + return href.getValue(); + } + + + /** + * @param args + * @throws JDOMException + * @throws IOException + * @throws IllegalStateException + */ + public static void main(String[] args) throws IllegalStateException, IOException, JDOMException { + + + Logger rl = Logger.getRootLogger(); + BasicConfigurator.configure(); + rl.setLevel(Level.DEBUG); + + + EScidocBasicHandler connector = new EScidocBasicHandler("192.168.56.2",8080,"dwinter","weikiki7"); + + ECHO_ESCIDOC_init ini = new ECHO_ESCIDOC_init(connector); + //System.out.println(ini.createCollectionCM()); + System.out.println(ini.createFoxridgeResourceCM()); + + + } + +} diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/harvesting/ESciDocDataHarvester.java Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,168 @@ +package de.mpiwg.itgroup.eSciDoc.harvesting; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; + + +import org.apache.http.HttpResponse; +import org.apache.log4j.BasicConfigurator; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.log4j.xml.DOMConfigurator; +import org.jdom.JDOMException; + +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler; +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools; +import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHOObject; +import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHORessource; +import de.mpiwg.itgroup.eSciDoc.importer.ECHOImporter; +import de.mpiwg.itgroup.eSciDoc.importer.Importer; +import de.mpiwg.itgroup.eSciDoc.transformer.ECHOTransformer; +import de.mpiwg.itgroup.eSciDoc.transformer.Transformer; +import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject; + +public class ESciDocDataHarvester { + + protected Logger logger = Logger.getRootLogger(); + protected Importer importer; + protected EScidocBasicHandler connector; + protected Transformer transformer; + private EScidocTools tools; + private String echoContext; + private Logger addedFile = Logger.getLogger("addedFilesLogger"); + private Logger notAddedFile = Logger.getLogger("notAddedFilesLogger"); + + + public ESciDocDataHarvester(Importer importer, Transformer transformer, EScidocBasicHandler connector, String context) throws IOException{ + this.importer=importer; + this.transformer=transformer; + this.connector=connector; + this.tools=new EScidocTools(connector); + this.echoContext= context; + + + } + public Boolean readObjectsFromInstance(String type) throws Exception{ + ArrayList addedObjects = new ArrayList(); + ArrayList notAddedObjects = new ArrayList(); + for (ECHOObject obj: importer.getObjectList(type)){ + + + if (ECHORessource.class.isInstance(obj)){ + try { + if (connector.alreadyExists("/md-records/md-record/admin/archivePath",((ECHORessource)obj).archivePath,echoContext)){ + logger.debug("already exist:"+((ECHORessource)obj).archivePath); + continue; + } + } catch (Exception e) { + logger.debug("already exist error"); + e.printStackTrace(); + continue; + } + } + + obj.context=echoContext; + + String contid=connector.getIDfromPID(obj.pid,echoContext); + if (contid!=null){ + System.out.println("------- belongsTo:"+contid); + } else { + + eSciDocXmlObject escidocItem = transformer.transform(obj); + logger.info(escidocItem.printXML()); + // TODO write PID to back to echo-obj + Boolean result = connector.createItem(escidocItem); + if (result){ + addedObjects.add(escidocItem.getESciDocId()); + addedFile.debug(escidocItem.getESciDocId()+"\n"); + //addedFile.write(escidocItem.getESciDocId()+"\n"); + //addedFile.flush(); + + }else { + notAddedObjects.add(obj.echoUrl); + notAddedFile.debug(obj.echoUrl); + //notAddedFile.write(obj.echoUrl+"\n"); + //notAddedFile.flush(); + } + //if (result == ESciDocConnector.WRITE_RESULT_PID_EXISTS){ + // logger.info("PID already exists:"+obj); + //} else if (result == ESciDocConnector.WRITE_RESULT_OBJ_WITH_SAME_REFERENCE){ + // logger.info("Object with reference to the same digital object already exists:"+obj); + //} + + } + } + if(logger.getLevel()==Level.DEBUG){ + for (String addedObject:addedObjects){ + logger.debug(addedObject); + } + } + +// File outFile = new File("/tmp/import.out"); +// FileWriter fw = new FileWriter(outFile); +// for (String addedObject:addedObjects){ +// fw.write(addedObject+"\n"); +// } +// for (String addedObject:notAddedObjects){ +// fw.write(addedObject+"\n"); +// } +// fw.close(); + return true; + } + + public void releaseAndSubmitObjects(String command,String objectXPath) throws Exception{ + for (eSciDocXmlObject obj: connector.getObjectListFromFilterResult(command,objectXPath)){ + HttpResponse res = connector.submitAnObject(obj,"first release"); + logger.debug(res.getStatusLine()); + if (res.getStatusLine().getStatusCode()!=200){ + logger.debug("Can not submit:"+obj.getESciDocId()); + //res.getEntity().consumeContent(); // necessary to release the conneciton + + } + res.getEntity().consumeContent(); // necessary to release the conneciton + + if (!connector.upDateObject(obj)){ + logger.debug("Can not update:"+obj.getESciDocId()); + //continue; + + } + + + res = connector.releaseAnObject(obj, "first release"); + logger.debug(res.getStatusLine()); + if (res.getStatusLine().getStatusCode()!=200){ + logger.debug("Can not release:"+obj.getESciDocId()); + res.getEntity().consumeContent(); // necessary to release the conneciton + continue; + } + addedFile.debug("RELEASED:"+obj.getESciDocId()); + res.getEntity().consumeContent(); // necessary to release the conneciton + } + + } + public static void main(String[] args) throws Exception{ + + Logger rl = Logger.getRootLogger(); + DOMConfigurator.configure("log4uconf.xml"); + rl.setLevel(Level.DEBUG); + + + EScidocBasicHandler connector = new EScidocBasicHandler("192.168.56.2",8080,"dwinter","weikiki7"); + ECHOImporter newimporter = new ECHOImporter(new URL("file:///Users/dwinter/libcoll.rdf")); + ESciDocDataHarvester hv = new ESciDocDataHarvester(newimporter, + new ECHOTransformer(),connector,"/ir/context/escidoc:40001"); + + //hv.readObjectsFromInstance("ECHO_collection"); + //hv.readObjectsFromInstance("ECHO_resource"); + + hv.releaseAndSubmitObjects("/ir/context/escidoc:40001/resources/members","//escidocItem:item"); + + +// newimporter.organizeRessourcesInCollections(connector, "/ir/context/escidoc:1001"); + //hv.releaseAndSubmitObjects("/ir/containers","//container:container"); + } +} diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/harvesting/FoxridgeHarverster.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/harvesting/FoxridgeHarverster.java Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,51 @@ +package de.mpiwg.itgroup.eSciDoc.harvesting; + +import java.io.File; +import java.net.MalformedURLException; +import java.net.URL; + +import org.apache.log4j.BasicConfigurator; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.log4j.xml.DOMConfigurator; + +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler; +import de.mpiwg.itgroup.eSciDoc.foxridge.IndexMetaIterator; +import de.mpiwg.itgroup.eSciDoc.importer.ECHOImporter; +import de.mpiwg.itgroup.eSciDoc.importer.FoxridgeImporter; +import de.mpiwg.itgroup.eSciDoc.transformer.ECHOTransformer; + +public class FoxridgeHarverster { + + /** + * @param args + * @throws Exception + */ + public static void main(String[] args) throws Exception { + + DOMConfigurator.configure("log4uconf.xml"); + Logger rl = Logger.getRootLogger(); + Logger.getLogger("transformerLogger").setLevel(Level.DEBUG); + Logger.getLogger("addedFilesLogger").setLevel(Level.DEBUG); + Logger.getLogger("notAddedFilesLogger").setLevel(Level.DEBUG); + + rl.setLevel(Level.DEBUG); + + //IndexMetaIterator sd = new IndexMetaIterator(new File("/Volumes/online_permanent/echo")); + //while (sd.hasNext()){ + // System.out.println(sd.next()); + //} + EScidocBasicHandler connector = new EScidocBasicHandler("escidoc-test.mpiwg-berlin.mpg.de",8080,"dwinter","weikiki7"); + + ESciDocDataHarvester hv = new ESciDocDataHarvester(new FoxridgeImporter(new File("/Volumes/online_permanent/")), + new ECHOTransformer(),connector,"/ir/context/escidoc:12001"); + + //hv.readObjectsFromInstance("ECHO_collection"); + hv.readObjectsFromInstance("ECHO_resource"); + + //hv.releaseAndSubmitObjects("/ir/context/escidoc:12001"); + + + } + +} diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/importer/ECHOImporter.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/importer/ECHOImporter.java Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,638 @@ +package de.mpiwg.itgroup.eSciDoc.importer; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.StringReader; +import java.lang.reflect.Array; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; + +import org.apache.http.HttpEntity; +import org.apache.http.HttpResponse; +import org.apache.http.client.ClientProtocolException; +import org.apache.http.client.HttpClient; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.impl.client.DefaultHttpClient; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.jdom.Attribute; +import org.jdom.Document; +import org.jdom.Element; +import org.jdom.JDOMException; +import org.jdom.Namespace; +import org.jdom.input.SAXBuilder; +import org.jdom.output.XMLOutputter; +import org.jdom.xpath.XPath; +import org.w3c.dom.Entity; + +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler; +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools; +import de.mpiwg.itgroup.eSciDoc.Tools.Html2Text; +import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHOCollection; +import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHOObject; +import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHORessource; +import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject; + +public class ECHOImporter implements Importer { + + private Logger logger = Logger.getRootLogger(); + private static long MAX_RES = 1000000L; // for debugging + private URL instanceUrl; + private String collectionCMM = "/cmm/content-model/escidoc:11004"; + + public ECHOImporter(URL url) { + this.instanceUrl = url; + } + + @Override + public Iterable getObjectList(String type) { + + try { + return getObjectListfromRDF(type); + } catch (JDOMException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + return new ArrayList(); + } + + private ArrayList getObjectListfromRDF(String type) + throws JDOMException, IOException { + + ArrayList ret = new ArrayList(); + + SAXBuilder builder = new SAXBuilder(); + + Document doc = builder.build(instanceUrl); + + Element el = doc.getRootElement(); + + // get resources + XPath xpathResources = XPath + .newInstance("//rdf:Description[echonavigation:type='" + type + + "']"); + xpathResources.addNamespace("MPIWG", + "http://www.mpiwg-berlin.mpg.de/ns/mpiwg"); + xpathResources.addNamespace("rdf", + "http://www.w3.org/1999/02/22-rdf-syntax-ns#"); + xpathResources + .addNamespace("echonavigation", "http://www.echo.eu/rdf#"); + + List paths = xpathResources.selectNodes(el); + + int counter = 0; + for (Element path : paths) { + counter++; + logger.debug("resource counter:" + String.valueOf(counter)); + if (logger.getLevel().equals(Level.DEBUG) && (counter > MAX_RES)) + break; + + ECHOObject obj = getECHORessourceFromRDF(el, path, type); + // fueger dem object seine PID hinzu. + String pid; + try { + pid = obj.getOrCreatePID(); + } catch (Exception e) { + // TODO Auto-generated catch block + e.printStackTrace(); + pid = null; + } + if (pid == null) { + logger.error("Cannot createOrGetAn a PID for:" + obj.toString()); + logger.error("Object will not be added"); + } else { + ret.add(obj); + } + } + + return ret; + } + + private ECHOObject getECHORessourceFromRDF(Element el, Element path, + String echotype) throws JDOMException, IOException { + + XPath xpath = EScidocTools.getESciDocXpath("./@rdf:about"); + Attribute aboutAttr = (Attribute) xpath.selectSingleNode(path); + String aboutString = aboutAttr.getValue(); + // hole das object + + xpath = EScidocTools.getESciDocXpath(".//echonavigation:name"); + String name = ((Element) xpath.selectSingleNode(path)).getTextTrim(); + + Html2Text htmlParser = new Html2Text(); // filter html codes + htmlParser.parse(new StringReader(name)); + name = htmlParser.getText(); + + xpath = EScidocTools.getESciDocXpath(".//mpiwg:archive-path"); + Element archiveElement = (Element) xpath.selectSingleNode(path); + String archivePath = ""; + if (archiveElement != null) + archivePath = archiveElement.getTextTrim(); + + xpath = EScidocTools.getESciDocXpath("@rdf:about"); + String about = ((Attribute) xpath.selectSingleNode(path)).getValue(); + + // hole seq des objectes + String sequenceString = ("//rdf:Seq[@rdf:about='" + about + "']/rdf:li/@rdf:resource"); + xpath = EScidocTools.getESciDocXpath(sequenceString); + + List seqs = xpath.selectNodes(el); + + ECHOObject er = null; + if (echotype.equals("ECHO_resource")) { + er = new ECHORessource(name, archivePath, aboutString); + } else if (echotype.equals("ECHO_collection")) { + er = new ECHOCollection(name, aboutString); + } + + // set description + DefaultHttpClient hc = new DefaultHttpClient(); + URI echoUri; + try { + echoUri = new URI(er.echoUrl + "/getDescription"); + + HttpGet hg = new HttpGet(echoUri); + + HttpResponse resp = hc.execute(hg); + HttpEntity respEnt = resp.getEntity(); + if (respEnt != null) { + // er.description=EScidocBasicHandler.convertStreamToString(respEnt.getContent()); + // filter html codes + htmlParser.parse(new InputStreamReader(respEnt.getContent())); + er.description = htmlParser.getText(); + + } + } catch (Exception e1) { + logger.debug("echoImporter no URI:" + er.echoUrl); + // e1.printStackTrace(); + } + + for (Attribute seq : seqs) { + String typeString = ("//rdf:Description[@rdf:about='" + + seq.getValue() + "']/echonavigation:type"); + xpath = EScidocTools.getESciDocXpath(typeString); + + Element typeNode = (Element) xpath.selectSingleNode(el); + if (typeNode==null){ + logger.debug("getRessourceFromRDF, no type in:"+typeString); + continue; + } + String type = (typeNode).getTextTrim(); + if (ECHORessource.class.isInstance(er) + && type.equals("ECHO_metaData")) { + + HttpClient client = new DefaultHttpClient(); + HttpGet get = new HttpGet(seq.getValue().replace( + "showMetaDataXML", "getMetaDataLink")); + HttpResponse ret = null; + try { + ret = client.execute(get); + + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + int code = ret.getStatusLine().getStatusCode(); + try { + if ((code == 204) || (code >= 300)) + ((ECHORessource) er).metaData = ""; + else { + String str = EScidocBasicHandler + .convertStreamToString(ret.getEntity() + .getContent()); + ((ECHORessource) er).metaData = ((ECHORessource) er) + .correctML(str); + } + + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + } else if (ECHORessource.class.isInstance(er) + && type.equals("ECHO_fulltext")) { + + HttpClient client = new DefaultHttpClient(); + HttpGet get = new HttpGet(seq.getValue() + "?noredirect=yes"); + HttpResponse ret = null; + try { + ret = client.execute(get); + + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + try { + String str = EScidocBasicHandler.convertStreamToString(ret + .getEntity().getContent()); + ((ECHORessource) er).fullText = new String(str); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + } + + } + + logger.debug(er.toString()); + return er; + } + + /** + * Erzeugt Collections auf der Basis der in den Metadaten gespeicherten ECHO + * urls. Dabei wir der Pfad schrittweise analysiert und dann ein Baum + * aufgebaut. + * + * @param handler + * eScidoc Serververbindung + * @param context + * Context der Kollektion (sollte eine Kollektio sein die aus + * einer ECHO webseite aufgebaut wurde. + * @throws Exception + */ + public void organizeRessourcesInCollections(EScidocBasicHandler handler, + String context) throws Exception { + HashMap> tree = new HashMap>(); // nimmt + // den + // tree + // der + // items + // auf + HashMap url2escidocId = new HashMap(); + + HashMap containerUrl2escidocId = new HashMap(); + HashMap> containerTree = new HashMap>(); // nimmt + // den + // tree + // der + // container + // auf + + ArrayList urls = handler.getAllLinksOfContext("web_page", + context); + generateTreeAndConversion(urls, tree, url2escidocId); + + File tt = new File("/tmp/list.out"); + FileWriter fw= new FileWriter(tt); + for (String containerUrl : tree.keySet()) { + fw.write(containerUrl); + } + fw.close(); + + // erzeuge jetzt die container + for (String containerUrl : tree.keySet()) { + XPath xp; + + // erzeuge Document des Container mit dem entsprechenden Kontext und den Metadaten aus dem Context. + Document doc = createContainer(handler, context, url2escidocId, + containerUrl); + + if (doc==null){ + doc= createContainerFromECHO(handler, containerUrl, context); + } + + // now fill the container + + xp = EScidocTools.getESciDocXpath("//struct-map:struct-map"); + Element structmap = (Element) xp.selectSingleNode(doc); + + // fuege die Collection selbst in den container + putContentInStructMap(structmap, url2escidocId.get(containerUrl)); + + + //fuege nun nur die ressourcen hinzu + for (String content : tree.get(containerUrl)) { + if (!contentIsCollection(handler,content)) + putContentInStructMap(structmap, content); + } + logger.debug(printXML(doc)); + + try { + + // rrzeuge das object jetzt in escidoc + String result = handler.createObject("/ir/container", + printXML(doc)); + xp = EScidocTools + .getESciDocXpath("//container:container/@xlink:href"); + Document containerDoc = new SAXBuilder().build(EScidocBasicHandler + .convertStringToStream(result)); + Attribute containerHref = (Attribute) xp.selectSingleNode(containerDoc); + logger.debug("added container:" + containerHref); + Logger.getLogger("addedFilesLogger").debug( + "added container:" + containerHref); + + // sichere jetzt den neuen container im container tree + String[] splitted = containerUrl.split("/"); // teile dazu die + // container url + // wieder auf. + StringBuffer buffer = new StringBuffer(); + for (int i = 0; i < splitted.length - 2; i++) { + buffer.append(splitted[i]); + buffer.append("/"); + + } + buffer.append(splitted[splitted.length - 2]); + + String parentContainer = buffer.toString(); + + if (!containerTree.containsKey(parentContainer)) { + containerTree.put(parentContainer, new ArrayList()); + } + containerTree.get(parentContainer).add(containerHref.getValue()); + + containerUrl2escidocId.put(containerUrl, containerHref.getValue()); + + } catch (Exception e) { + Logger.getLogger("notAddedFilesLogger").debug( + "notadded container:" + containerUrl); + logger.debug("notadded container:" + containerUrl); + } + + } + addContainer(handler, containerTree, containerUrl2escidocId, context); // add + // the + // container + // to + // the + // struct + // maps + // of + // the + // parents + } + + /** Teste ob sich hinter content eine ressource oder eine collection versteckt + * @param content, (escidocid,echourl) des content + * @return + * @throws IOException + * @throws JDOMException + */ + private boolean contentIsCollection(EScidocBasicHandler handler, String content) throws IOException, JDOMException { + String url = content.split(",")[0]; + HttpResponse result = handler.eScidocGet(url); + InputStream xml = result.getEntity().getContent(); + String cmm = EScidocBasicHandler.getContentModel(xml); + + return cmm.equals(collectionCMM); + } + + /** FŸge einen content in die struct-map + * @param structmap + * @param content, (escidocID,url) der Ressource + */ + public void putContentInStructMap(Element structmap, String content) { + if (content==null) // existiert nicht + return; + + String[] urlSplit = content.split(","); // urls von get all + // links haben immer die + // form escidoc:1,url + String newItemUrl = urlSplit[0]; + + Element newItem = new Element("item", EScidocTools.srel); + + Namespace ns = Namespace.getNamespace("xlink", + EScidocTools.xlink); + newItem.setAttribute("href", newItemUrl, ns); + structmap.addContent(newItem); + } + + /** Erzeuge eine Container + * @param handler Context des Containers + * @param url2escidocId Liste mit url -> escidocId Zurordnungen + * @param collectionURL, echo url der collection zu der der Container erzeugt werden soll + * @return + * @throws JDOMException + * @throws IOException + * @throws ClientProtocolException + */ + public Document createContainer(EScidocBasicHandler handler, + String context, HashMap url2escidocId, + String collectionURL) throws JDOMException, IOException, + ClientProtocolException { + + InputStream is = getClass() + .getResourceAsStream( + "/de/mpiwg/itgroup/eSciDoc/xmlTemplates/ECHOCollection_container.xml"); + Document doc = new SAXBuilder().build(is); + + XPath xp = EScidocTools + .getESciDocXpath("//srel:context/@xlink:href"); + Attribute href = (Attribute) xp.selectSingleNode(doc); + href.setValue(context); + + xp = EScidocTools + .getESciDocXpath("//srel:content-model/@xlink:href"); + href = (Attribute) xp.selectSingleNode(doc); + href.setValue(collectionCMM); // TODO mache das + // konfigurierbar, + // nimm z.z. + // echocollection + // modell + String cmd = url2escidocId.get(collectionURL); // ensprechende collection existiert nicht. + if (cmd==null){ + return null; + } + InputStream in = handler + .eScidocGet(cmd).getEntity() + .getContent(); + Document ecDoc = new SAXBuilder().build(in); + + + // copy description from collection to container + xp = EScidocTools.getESciDocXpath("/escidocItem:item//metadata-records:md-record[@name='escidoc']//dc:title"); + + + Element item = (Element) xp.selectSingleNode(ecDoc); + String title = "anon"; + if (item != null) + title = item.getTextTrim(); + + xp = EScidocTools.getESciDocXpath("/container:container//metadata-records:md-record[@name='escidoc']//dc:title"); + item = (Element) xp.selectSingleNode(doc); + + item.setText(title); + + xp = EScidocTools.getESciDocXpath("/escidocItem:item//metadata-records:md-record[@name='escidoc']//dc:description"); + item = (Element) xp.selectSingleNode(ecDoc); + String description; + if (item != null) { + description = item.getTextTrim(); + xp = EScidocTools.getESciDocXpath("/container:container//metadata-records:md-record[@name='escidoc']//dc:description"); + item = (Element) xp.selectSingleNode(doc); + item.setText(description); + } else { + // get description from ECHO + XPath url = EScidocTools + .getESciDocXpath(".//escidocComponents:component[escidocComponents:properties/prop:content-category[text()='web_page']]/escidocComponents:content/@xlink:href"); + Attribute hrefECHO = (Attribute) url.selectSingleNode(ecDoc); + if (hrefECHO != null) { + DefaultHttpClient hc = new DefaultHttpClient(); + HttpGet hg = new HttpGet(hrefECHO.getValue() + + "/getDescription"); + HttpResponse resp = hc.execute(hg); + HttpEntity respEnt = resp.getEntity(); + if (respEnt != null) { + item = (Element) xp.selectSingleNode(doc); + item.setText(EScidocBasicHandler + .convertStreamToString(respEnt.getContent())); + } + + } + } + return doc; + } + + /** + * Erzeugt aus einer Liste von urls der Form escidocID,url einen hierarchischen Tree, sowie jeweils eine Zuordnung der url zur escidocid + * @param urls, liste der urls der Form "escidocID,url" + * @param tree, hier wird der Tree rein geschrieben, sollte ein leerer HashMap sein + * @param url2escidocId, hier wird die Zuordnung, url -> escidocID abgespeichert + */ + public void generateTreeAndConversion(ArrayList urls, + HashMap> tree, + HashMap url2escidocId) { + for (String url : urls) { + // teile die url auf die url ohne den letzten teil ist die url der + // collection + String[] splitted = url.split("/"); + url2escidocId.put(url.split(",")[1], + url.split(",")[0]); + + + if (splitted.length > 1) // pfad is lang genug + { + StringBuffer buffer = new StringBuffer(); + for (int i = 0; i < splitted.length - 2; i++) { + buffer.append(splitted[i]); + buffer.append("/"); + + } + buffer.append(splitted[splitted.length - 2]); + + String collection = buffer.toString(); + String collectionUrl = collection.split(",")[1]; // nur die url + // nicht den + // escidoc-anteil. + + if (!tree.containsKey(collectionUrl)) { + tree.put(collectionUrl, new ArrayList()); + } + tree.get(collectionUrl).add(url); + + } + } + } + + private void addContainer(EScidocBasicHandler handler, + HashMap> containerTree, + HashMap containerUrl2escidocId, String context) + throws Exception { + for (String containerUrl : containerTree.keySet()) { + String escidocId = containerUrl2escidocId.get(containerUrl); +// if (escidocId == null) { +// +// // TODO: some containers have no ECHOcollection or +// // ECHO_ressourceif this is the case create it here +// escidocId = createContainerFromECHO(handler, containerUrl, +// context); +// logger.debug("container not in containerUrl2escidoc:" +// + containerUrl); +// } + if (escidocId==null){ + logger.debug("addContainer problem not in containerUrl2escidocId:"+containerUrl); + Document doc = createContainerFromECHO(handler, containerUrl, context); + String res = handler.createObject("/ir/container",printXML(doc)); + escidocId = "/ir/container/"+EScidocBasicHandler.getId(res); + //return "/ir/container/"+EScidocBasicHandler.getId(res); + + } + HttpResponse result = handler.eScidocGet(escidocId); + String obj = EScidocBasicHandler.convertStreamToString(result + .getEntity().getContent()); + String datestamp = EScidocBasicHandler.getDateStamp(obj); + String body = String.format( + "", datestamp); + + // fuege jetzt die id aller sub container ein + for (String content : containerTree.get(containerUrl)) { + String[] tmp = content.split("/"); + String addID = tmp[tmp.length - 1]; + body += String.format("%s", addID); + } + body += ""; + result = handler.eScidocPost(escidocId + "/members/add", + EScidocBasicHandler.convertStringToStream(body)); + String retText = EScidocBasicHandler.convertStreamToString(result + .getEntity().getContent()); + logger.debug("adding result:" + retText); + } + + } + + /** + * Erzeuge einen container aus echo daten + * @param handler + * @param url + * @param context + * @return + * @throws Exception + */ + private Document createContainerFromECHO(EScidocBasicHandler handler, + String url, String context) throws Exception { + InputStream is = getClass() + .getResourceAsStream( + "/de/mpiwg/itgroup/eSciDoc/xmlTemplates/ECHOCollection_container.xml"); + Document doc = new SAXBuilder().build(is); + + XPath xp = EScidocTools.getESciDocXpath("//srel:context/@xlink:href"); + Attribute href = (Attribute) xp.selectSingleNode(doc); + href.setValue(context); + + xp = EScidocTools.getESciDocXpath("//srel:content-model/@xlink:href"); + href = (Attribute) xp.selectSingleNode(doc); + href.setValue(collectionCMM); // TODO mache das + // konfigurierbar, + // nimm z.z. + // echocollection + // modell + + + xp = EScidocTools.getESciDocXpath("//dc:title"); + + + String title = url; + + + Element item = (Element) xp.selectSingleNode(doc); + + item.setText(title); + + //String res = handler.createObject("/ir/container", + // printXML(doc)); + + //return "/ir/container/"+EScidocBasicHandler.getId(res); + + return doc; + } + + private String printXML(Document doc) { + XMLOutputter out = new XMLOutputter(); + + String string = out.outputString(doc); + return string; + + } +} \ No newline at end of file diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/importer/FoxridgeImporter.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/importer/FoxridgeImporter.java Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,20 @@ +package de.mpiwg.itgroup.eSciDoc.importer; + +import java.io.File; + +import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHOObject; +import de.mpiwg.itgroup.eSciDoc.foxridge.IndexMetaWalker; + +public class FoxridgeImporter implements Importer { + + private File rootFolder; + public FoxridgeImporter(File rootFolder){ + this.rootFolder=rootFolder; + } + @Override + public Iterable getObjectList(String type) { + + return new IndexMetaWalker(rootFolder); + } + +} diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/importer/Importer.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/importer/Importer.java Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,10 @@ +package de.mpiwg.itgroup.eSciDoc.importer; + + +import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHOObject; +public interface Importer { + + public Iterable getObjectList(String type); + + +} diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/transformer/ECHOTransformer.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/transformer/ECHOTransformer.java Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,118 @@ +package de.mpiwg.itgroup.eSciDoc.transformer; + +import java.io.InputStream; +import java.util.HashMap; + +import org.apache.http.HttpEntity; +import org.apache.http.HttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.impl.client.DefaultHttpClient; +import org.apache.log4j.Logger; +import org.jdom.Attribute; +import org.jdom.Document; +import org.jdom.Element; +import org.jdom.input.SAXBuilder; +import org.jdom.xpath.XPath; + +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocBasicHandler; +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools; +import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHOCollection; +import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHOObject; +import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHORessource; +import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject; +import de.mpiwg.itgroup.metadataManager.pid.DCTransformer; + +public class ECHOTransformer implements Transformer { + + private String ECHORESOURCE_TEMPLATE_XML = "/de/mpiwg/itgroup/eSciDoc/xmlTemplates/ECHOResourceTemplate.xml"; + private String ECHOCOLLECTION_TEMPLATE_XML = "/de/mpiwg/itgroup/eSciDoc/xmlTemplates/ECHOCollectionTemplate.xml"; + + private Logger logger = Logger.getLogger("transformerLogger"); + // + // + public ECHOTransformer(){ + + } + + public eSciDocXmlObject transform(ECHOObject obj) throws Exception { + + if (obj.pid==null) + return null; + + + + eSciDocXmlObject eSciDocObj = null; + if (ECHORessource.class.isInstance(obj)){ + + eSciDocObj = new eSciDocXmlObject(obj.pid, + getClass().getResourceAsStream(ECHORESOURCE_TEMPLATE_XML)); + + eSciDocObj.setContext(obj.context); + + eSciDocObj.addWebUrl(obj.echoUrl); + + eSciDocObj.setContentModel(obj.CMM); + + String ft = ((ECHORessource)obj).fullText; + if ((ft!=null) && !ft.equals("")){ + eSciDocObj.addFullText(ft); + } + + //TODO braucht man das? + eSciDocObj.addOrigUrlToMPIWGMetaData(((ECHORessource)obj).metaData); + + DCTransformer transf = new DCTransformer(((ECHORessource)obj).metaData); + Element mdDc = transf.getDCFromIndexMeta(); + if (mdDc!=null){ + eSciDocObj.insertDC(mdDc); + } + else { + logger.debug(((ECHORessource)obj).metaData); + HashMap dc = new HashMap(); + dc.put("title", obj.name); // ersatzweise den name aus der echo ressource + + eSciDocObj.insertDC(dc); + } + + //add description to dc metadata + + + HashMap dc = new HashMap(); + dc.put("description", obj.description); + + eSciDocObj.insertDC(dc); + + + Element metaData = transf.getContentOfMetaTag(); + + eSciDocObj.insertMeta(metaData); + + eSciDocObj.addDisplayUrl(((ECHORessource)obj).link); + + eSciDocObj.addIndexMetaUrl(((ECHORessource)obj).metaData); + + eSciDocObj.addArchiveFolderPath(((ECHORessource)obj).archivePath); + + eSciDocObj.addImageFolderPath(((ECHORessource)obj).getImageFolderPath()); + + + } + if (ECHOCollection.class.isInstance(obj)){ + eSciDocObj = new eSciDocXmlObject(obj.pid, + getClass().getResourceAsStream(ECHOCOLLECTION_TEMPLATE_XML)); + + eSciDocObj.addWebUrl(obj.echoUrl); + + eSciDocObj.setContentModel(obj.CMM); + HashMap dc = new HashMap(); + dc.put("title", obj.name); // ersatzweise den name aus der echo ressource + dc.put("description", obj.description); + + + + eSciDocObj.insertDC(dc); + } + return eSciDocObj; + } + +} diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/transformer/Transformer.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/transformer/Transformer.java Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,11 @@ +package de.mpiwg.itgroup.eSciDoc.transformer; + +import de.mpiwg.itgroup.eSciDoc.echoObjects.ECHOObject; + +import de.mpiwg.itgroup.eSciDoc.utils.eSciDocXmlObject; + +public interface Transformer { + + eSciDocXmlObject transform(ECHOObject obj) throws Exception; + +} diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/utils/EScidocNameSpaceContext.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/utils/EScidocNameSpaceContext.java Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,70 @@ +package de.mpiwg.itgroup.eSciDoc.utils; +import java.util.Iterator; + +import javax.xml.XMLConstants; +import javax.xml.namespace.NamespaceContext; + + +public class EScidocNameSpaceContext implements NamespaceContext { + static public String escidocItem="http://www.escidoc.de/schemas/item/0.7"; + static public String MPIWG = "http://www.mpiwg-berlin.mpg.de/ns/mpiwg"; + static public String prop="http://escidoc.de/core/01/properties/"; + static public String escidocComponents="http://www.escidoc.de/schemas/components/0.7"; + static public String xlink="http://www.w3.org/1999/xlink"; + static public String metadataRecords="http://www.escidoc.de/schemas/metadatarecords/0.4"; + public static final String DC = "http://purl.org/dc/elements/1.1/"; + public static final String container="http://www.escidoc.de/schemas/container/0.7"; + public static final String item="http://www.escidoc.de/schemas/item/0.7"; + public static final String srw="http://www.loc.gov/zing/srw/"; + public static final String version = "http://escidoc.de/core/01/properties/version/"; + + public String getNamespaceURI(String prefix) { + if (prefix.equals("escidocItem")) + return escidocItem; + else if (prefix.equals("mpiwg")) + return MPIWG; + else if (prefix.equals("escidocComponents")) + return escidocComponents; + else if (prefix.equals("prop")) + return prop; + else if (prefix.equals("xlink")) + return xlink; + else if (prefix.equals("metadata-records")) + return metadataRecords; + else if (prefix.equals("dc")) + return DC; + else if (prefix.equals("container")) + return container; + else + return XMLConstants.NULL_NS_URI; + + + } + + public String getPrefix(String namespaceURI) { + if (namespaceURI.equals(escidocItem)) + return "escidocITem"; + else if (namespaceURI.equals(MPIWG)) + return "mpiwg"; + else if (namespaceURI.equals(prop)) + return "prop"; + else if (namespaceURI.equals(escidocComponents)) + return "escidocComponents"; + else if (namespaceURI.equals(xlink)) + return "xlink"; + else if (namespaceURI.equals(metadataRecords)) + return "metadata-record"; + else if (namespaceURI.equals(DC)) + return "dc"; + else if (namespaceURI.equals(container)) + return "container"; + else + return null; + } + + public Iterator getPrefixes(String namespaceURI) { + // TODO Auto-generated method stub + return null; + } + +} diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/utils/eSciDocXmlObject.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/utils/eSciDocXmlObject.java Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,457 @@ +package de.mpiwg.itgroup.eSciDoc.utils; +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FilenameFilter; +import java.io.IOException; +import java.io.InputStream; +import java.io.StringBufferInputStream; +import java.io.StringWriter; +import java.io.UnsupportedEncodingException; +import java.io.Writer; +import java.net.URL; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Set; + + + +import org.w3c.dom.Attr; + +import org.xml.sax.SAXException; + +//import com.sun.org.apache.xml.internal.serialize.OutputFormat; +//import com.sun.org.apache.xml.internal.serialize.XMLSerializer; +import org.jdom.Attribute; +import org.jdom.Document; +import org.jdom.Element; +import org.jdom.JDOMException; +import org.jdom.Namespace; +import org.jdom.input.DOMBuilder; +import org.jdom.input.SAXBuilder; +import org.jdom.output.XMLOutputter; +import org.jdom.xpath.XPath; + +import de.mpiwg.itgroup.eSciDoc.Tools.EScidocTools; +import de.mpiwg.itgroup.eSciDoc.Tools.MyFileNameFilter; + +public class eSciDocXmlObject { + + + private Document dom; + private XPath xpath; + private String pid; + private DOMBuilder parser; + + public eSciDocXmlObject(String pid, InputStream template) throws Exception { + + SAXBuilder builder = new SAXBuilder(); + + dom = builder.build(template); + + xpath = EScidocTools.getESciDocXpath("//prop:pid"); + + Element test = (Element) xpath.selectSingleNode(dom); + + test.setText(pid); + this.pid=pid; + + } + +public eSciDocXmlObject(URL url) throws Exception { + + SAXBuilder builder = new SAXBuilder(); + + dom = builder.build(url); + + xpath = EScidocTools.getESciDocXpath("//prop:pid"); + + Element test = (Element) xpath.selectSingleNode(dom); + + this.pid= test.getTextTrim(); + + } + + public eSciDocXmlObject(Element el) throws JDOMException { + + dom = new Document((Element)el.clone()); + + xpath = EScidocTools.getESciDocXpath("//prop:pid"); + + Element test = (Element) xpath.selectSingleNode(dom); + if (test!=null) + this.pid= test.getTextTrim(); +} + + public void insertDC(Element mdDc) throws Exception{ + //NodeList dcList = mdDc.getFirstChild().getChildNodes(); + XPath dcXPath = EScidocTools.getESciDocXpath("//dc-md"); //todo check path to dc in escddoc templates + + Element dcContainer = (Element) dcXPath.selectSingleNode(dom); + + if (dcContainer==null) + throw new Exception(); + + for(Object ct: mdDc.getChildren()) + { + if (Element.class.isInstance(ct)){ + Element e = (Element) ct; + String name = e.getName(); + Element content = new Element(name, EScidocTools.DC); + content.setText(e.getText()); + dcContainer.addContent(content); + } + } + } + + public void insertDC(HashMap dc) throws Exception{ + Set dcList = dc.keySet(); + + XPath dcXPath = EScidocTools.getESciDocXpath("//dc-md"); //todo check path to dc in escddoc templates + + Element dcContainer = (Element) dcXPath.selectSingleNode(dom); + + for (String name : dcList){ + Element newChild = new Element(name,EScidocTools.DC); + newChild.setText(dc.get(name)); + dcContainer.addContent(newChild); + } + + } + + public void insertMeta(Element meta) throws Exception{ + if (meta==null) + return; + + XPath xp = EScidocTools.getESciDocXpath("//metadata-records:md-record[@name='mpiwg-index_meta']"); + Element dcContainer = (Element) xp.selectSingleNode(dom); + + dcContainer.addContent((Element)meta.clone()); + + } + + + public void setTitle(String title) throws Exception{ + XPath xp = EScidocTools.getESciDocXpath("//foxml:xmlContent/oai_dc:dc"); + Element dcContainer = (Element) xp.selectSingleNode(dom); + + + + Element newChild = new Element("title",EScidocTools.DC); + newChild.setText(title); + + + dcContainer.addContent(newChild); + + + + + } + + public void setReferenceFolder(String folderpath) throws Exception{ + XPath xp = EScidocTools.getESciDocXpath("//mpiwg:bundle/mpiwg:bundle-root/@path"); + Element container = (Element) xp.selectSingleNode(dom); + + container.setText(folderpath); + + } + + public void setVLPId(String id) throws Exception{ + XPath xp = EScidocTools.getESciDocXpath("//vlp:admin/vlp:identifier"); + Element container = (Element) xp.selectSingleNode(dom); + + container.setText(id); + } + /** + * @param file Pfad mit den Grundordner des Dokument + * @throws Exception +*/ + public void generateXMLIndex(File file) throws Exception{ + + + File[] files = file.listFiles(); + + //first try to find the image path + File imagefolder = null; + for (File f: files){ + + if (f.getName().equals("pageimg")) + imagefolder = f; + else if (f.getName().equals("pages")) + imagefolder = f; + } + if (imagefolder==null) + return; + + FilenameFilter filter = new MyFileNameFilter(); + + ArrayList> tupels = new ArrayList>(); + ArrayList tmp; + for (File image : imagefolder.listFiles(filter)) + { + tmp = new ArrayList(); + + tmp.add(calculateName(image)); + tmp.add(image.getName()); + + tupels.add(tmp); + } + generateXMLIndex(tupels); + } + + private void generateXMLIndex(ArrayList> tupels) throws Exception { + // DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + //factory.setNamespaceAware(true); + //DocumentBuilder db =factory.newDocumentBuilder(); + + XPath xp = EScidocTools.getESciDocXpath("//vlp:index"); + Element index = (Element) xp.selectSingleNode(dom); + + + for (ArrayList tuple: tupels ){ + + Element newChild = new Element("page",EScidocTools.VLP); + newChild.setText(tuple.get(0)); + + newChild.setAttribute("nr", tuple.get(1)); + index.addContent(newChild); + + } + + + } + + private String calculateName(File f) + { + String name = f.getName(); + String[] splitted = name.split("[.]"); + String n = splitted[0]; + String name2 = n.substring(1); + char letter = n.charAt(0); + return letter+": "+name2; + }; + + + public void setRelationship(String collection) throws Exception{ + XPath xp = EScidocTools.getESciDocXpath("//foxml:datastream[@ID='RELS-EXT']/foxml:datastreamVersion/foxml:xmlContent"); + Element xmlContent = (Element) xp.selectSingleNode(dom); + + + XPath rdfxp = EScidocTools.getESciDocXpath("//rdf:RDF"); + Element rdf = (Element) rdfxp.selectSingleNode(dom); + + + if (rdf==null) + { + rdf = new Element("RDF",EScidocTools.RDF); + + xmlContent.addContent(rdf); + } + + rdf.setAttribute("rdf", EScidocTools.RDF); + rdf.setAttribute("fedora", EScidocTools.FEDORA); + + Element rdfDescription = new Element("Description",EScidocTools.RDF); + + Namespace ns = Namespace.getNamespace("rdf",EScidocTools.RDF); + rdfDescription.setAttribute("about","info:fedora/"+pid, ns); + + Element rdfMember = new Element("isMemberOfCollection",EScidocTools.FEDORA); + rdfDescription.setAttribute("resource", collection, ns); + + rdfDescription.addContent(rdfMember); + + rdf.addContent(rdfDescription); + + + + + } + public void addWebUrl(String url) throws Exception + { + XPath xp = EScidocTools.getESciDocXpath("//escidocComponents:content[@xlink:title='web_page']"); + Element xmlContent = (Element) xp.selectSingleNode(dom); + + + Namespace ns = Namespace.getNamespace("xlink",EScidocTools.xlink); + + xmlContent.setAttribute("href", url,ns); + } + + public void addOrigUrlToMPIWGMetaData(String url) throws Exception + { + XPath xp = EScidocTools.getESciDocXpath("//mpiwg:admin/mpiwg:orig-ref"); + Element xmlContent = (Element) xp.selectSingleNode(dom); + + + Namespace ns = Namespace.getNamespace("xlink",EScidocTools.xlink); + + xmlContent.setAttribute("href", url,ns); + + } + + public void addIndexMetaUrl(String url) throws Exception + { + if (url.equals("")) + return; + + InputStream componentXML=getClass().getResourceAsStream("/de/mpiwg/itgroup/eSciDoc/xmlTemplates/component.xml"); + addComponent(url,componentXML,"index_meta","text/xml","internal-managed"); + + + } + + + + public String printXML() throws IOException{ + XMLOutputter out = new XMLOutputter(); + + String string = out.outputString(dom); + return string; + + } + + public void addFullText(String url) throws Exception { + addFullText(url,getClass().getResourceAsStream("/de/mpiwg/itgroup/eSciDoc/xmlTemplates/component.xml")); + + } + + public void addFullText(String url, InputStream inputStream) throws Exception { + addComponent(url, inputStream,"text","text/xml","internal-managed"); + } + + + public void addComponent(String url, InputStream inputStream, String category, String mimeType, String storageType) throws JDOMException, IOException{ + + SAXBuilder builder = new SAXBuilder(); + + Document dom2 = builder.build(inputStream); + + + XPath xp = EScidocTools.getESciDocXpath("//escidocComponents:properties/prop:content-category"); + Element item = (Element)xp.selectSingleNode(dom2); + item.setText(category); + + xp = EScidocTools.getESciDocXpath("//escidocComponents:properties/prop:mime-type"); + item = (Element)xp.selectSingleNode(dom2); + item.setText(mimeType); + + xp = EScidocTools.getESciDocXpath("//escidocComponents:content"); + item = (Element)xp.selectSingleNode(dom2); + + Namespace ns = Namespace.getNamespace("xlink",EScidocTools.xlink); + + item.setAttribute("href", url,ns); + item.setAttribute("storage",storageType); + + + xp = EScidocTools.getESciDocXpath("//escidocComponents:components"); + item = (Element)xp.selectSingleNode(dom); + + + //Node newNode = dom.importNode(dom2.getFirstChild(), true); + item.addContent((Element)dom2.getRootElement().clone()); + } + + public void addImageFolderPath(String imageFolder) throws Exception { + + XPath xp = EScidocTools.getESciDocXpath("//mpiwg:admin"); + Element item = (Element)xp.selectSingleNode(dom); + + Element images= new Element("imageFolder",EScidocTools.MPIWG); + images.setText(imageFolder); + + item.addContent(images); + + + } + + public void addArchiveFolderPath(String archivePath) throws JDOMException { + XPath xp = EScidocTools.getESciDocXpath("//mpiwg:admin"); + Element item = (Element)xp.selectSingleNode(dom); + + Element images= new Element("archivePath",EScidocTools.MPIWG); + images.setText(archivePath); + + item.addContent(images); + + } + + public Boolean upDateFromXML(String xml) { + SAXBuilder builder = new SAXBuilder(); + + + ByteArrayInputStream in; + try { + in = new ByteArrayInputStream(xml.getBytes("utf-8")); + + dom = builder.build(in); + + } catch (UnsupportedEncodingException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + return false; + } catch (JDOMException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + return false; + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + return false; + } + + return true; + + + + } + + public void addDisplayUrl(String link) throws JDOMException, IOException { + + if (link==null || link.equals("")) + return; + + InputStream componentXML=getClass().getResourceAsStream("/de/mpiwg/itgroup/eSciDoc/xmlTemplates/component.xml"); + addComponent(link,componentXML,"display_url","text/html","external-url"); + + + + + + } + + public String getESciDocId() throws JDOMException { + + Element el = dom.getRootElement(); + + XPath xp = EScidocTools.getESciDocXpath("./@xlink:href"); + Attribute href = (Attribute) xp.selectSingleNode(el); + return href.getValue(); + } + + public void setContentModel(String href) throws JDOMException { + + XPath xp = EScidocTools.getESciDocXpath("//escidocItem:properties/srel:content-model/@xlink:href"); + Attribute item = (Attribute)xp.selectSingleNode(dom); + item.setValue(href); + + + } + + public void setContext(String context) throws JDOMException { + XPath xp = EScidocTools.getESciDocXpath("//escidocItem:properties/srel:context/@xlink:href"); + Attribute item = (Attribute)xp.selectSingleNode(dom); + item.setValue(context); + + + } + + public Document getDocument(){ + return dom; + } + + +} diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/xmlTemplates/ECHOCollectionTemplate.out.xml diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/xmlTemplates/ECHOCollectionTemplate.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/xmlTemplates/ECHOCollectionTemplate.xml Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,82 @@ + + + + + + + + PID + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + valid + public + web_page + text/xml + + + + + + + + + + diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/xmlTemplates/ECHOCollection_container.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/xmlTemplates/ECHOCollection_container.xml Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,55 @@ + + + + + + + + + + + + + + + + + + + + Example Container 02 (REST) + + + + + + + + + + + diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/xmlTemplates/ECHOCollection_content-model.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/xmlTemplates/ECHOCollection_content-model.xml Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,9 @@ + + + + + ECHO collection CM + collections + + + \ No newline at end of file diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/xmlTemplates/ECHOResourceTemplate.out.xml diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/xmlTemplates/ECHOResourceTemplate.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/xmlTemplates/ECHOResourceTemplate.xml Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,90 @@ + + + + + + + + PID + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + valid + public + web_page + text/xml + + + + + + + + + + diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/xmlTemplates/ECHOResourceTemplate_old.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/xmlTemplates/ECHOResourceTemplate_old.xml Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,143 @@ + + + + + + + + + + PID + + + + 1 + 2007-11-15T08:36:58.765Z + released + + + Status changed to released for Item + escidoc:ex5. + + + + + 1 + 2007-11-15T08:36:58.765Z + + + + + 1 + 2007-11-15T08:36:58.765Z + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + valid + public + web_page + text/xml + + + + + + + + + + valid + public + index_meta + text/xml + + + + + + + diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/xmlTemplates/ECHOResource_content-model.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/xmlTemplates/ECHOResource_content-model.xml Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,9 @@ + + + + + ECHO resource content model + ECHO resource + + + \ No newline at end of file diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/xmlTemplates/FoxridgeResource_content-model.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/xmlTemplates/FoxridgeResource_content-model.xml Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,9 @@ + + + + + Foxridge resource content model + Foxridge resource + + + \ No newline at end of file diff -r 000000000000 -r c6929e63b0b8 src/de/mpiwg/itgroup/eSciDoc/xmlTemplates/component.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/eSciDoc/xmlTemplates/component.xml Wed Nov 24 16:52:07 2010 +0100 @@ -0,0 +1,19 @@ + + + + + valid + public + image + image/jpeg + + + + + \ No newline at end of file