view software/mpdl-services-new/mpiwg-mpdl-cms-web/src/de/mpg/mpiwg/berlin/mpdl/servlets/lt/GetLemmas.java @ 25:e9fe3186670c default tip

letzter Stand eingecheckt
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Tue, 21 May 2013 10:19:32 +0200
parents
children
line wrap: on
line source

package de.mpg.mpiwg.berlin.mpdl.servlets.lt;

import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;

import javax.servlet.ServletConfig;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
import de.mpg.mpiwg.berlin.mpdl.lt.dict.db.LexHandler;
import de.mpg.mpiwg.berlin.mpdl.lt.general.Language;
import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Form;
import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Lemma;
import de.mpg.mpiwg.berlin.mpdl.lt.text.norm.Normalizer;

public class GetLemmas extends HttpServlet {
  private static final long serialVersionUID = 1L;
  private LexHandler lexHandler;

  public GetLemmas() {
    super();
  }

  public void init(ServletConfig config) throws ServletException  {
    super.init(config);
    try {
      lexHandler = LexHandler.getInstance();
    } catch (ApplicationException e) {
      throw new ServletException(e);
    }
  }

  protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
    Date begin = new Date();
    request.setCharacterEncoding("utf-8");
    response.setCharacterEncoding("utf-8");
    String query = request.getParameter("query");
    String language = request.getParameter("language");
    String inputType = request.getParameter("inputType");
    String outputFormat = request.getParameter("outputFormat");
    String outputType = request.getParameter("outputType");
    String normalization = request.getParameter("normalization");
    if (language == null)
      language = "eng";
    if (inputType == null || ! (inputType.equals("form") || inputType.equals("lemma")))
      inputType = "form";
    if (outputFormat == null || ! (outputFormat.equals("xml") || outputFormat.equals("html") || outputFormat.equals("string")))
      outputFormat = "xml";
    if (outputType == null || ! (outputType.equals("compact") || outputType.equals("full")))
      outputType = "compact";
    if (normalization == null || ! (normalization.equals("none") || normalization.equals("reg") || normalization.equals("reg norm")))
      normalization = "norm";
    int normalizationType = Normalizer.DICTIONARY;
    if (normalization != null && normalization.equals("none"))
      normalizationType = Normalizer.NONE;

    String xmlQueryString = "<query><name>" + query + "</name>" + "<language>" + language + "</language>" + "<inputType>" + inputType + "</inputType>" + 
      "<outputFormat>" + outputFormat + "</outputFormat>" + "<outputType>" + outputType + "</outputType>" + "<normalization>" + normalization + "</normalization>" + "</query>";
    try {
      if (outputFormat.equals("xml"))
        response.setContentType("text/xml");
      else if (outputFormat.equals("html") || outputFormat.equals("string"))
        response.setContentType("text/html");
      else 
        response.setContentType("text/xml");
      PrintWriter out = response.getWriter();
      if (query == null || query.isEmpty()) {
        out.print("request parameter query is empty. Please specify a query.");
        out.close();
        return;
      }
      ArrayList<Lemma> lemmas = lexHandler.getLemmas(query, inputType, language, normalizationType, true);
      String baseUrl = getBaseUrl(request);
      Date end = new Date();
      String elapsedTime = String.valueOf(end.getTime() - begin.getTime());
      String result = "";
      if (outputFormat == null || outputFormat.equals("xml"))
        result = createXmlOutputString(query, lemmas, outputType, baseUrl, xmlQueryString, elapsedTime);
      else if (outputFormat.equals("html"))
        result = createHtmlOutputString(query, lemmas, outputType, elapsedTime);
      else if (outputFormat.equals("string"))
        result = createStringOutputString(lemmas);
      else 
        result = createXmlOutputString(query, lemmas, outputType, baseUrl, xmlQueryString, elapsedTime);
      out.print(result);
      out.close();
    } catch (ApplicationException e) { 
      throw new ServletException(e);
    }
  }

  protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
    doGet(request, response);
  }  
  
  private String getBaseUrl( HttpServletRequest request ) {
    if (request.getServerPort() == 80 || request.getServerPort() == 443)
      return request.getScheme() + "://" + request.getServerName() + request.getContextPath();
    else
      return request.getScheme() + "://" + request.getServerName() + ":" + request.getServerPort() + request.getContextPath();
  }

  
  private String createXmlOutputString(String query, ArrayList<Lemma> lemmas, String outputType, String baseUrl, String xmlQueryString, String elapsedTime) {
    String result = "<result>";
    result = result + "<provider>" + "MPIWG MPDL language technology service (see: " + "" + baseUrl + "), Max Planck Institute for the History of Science, Berlin." + "</provider>";
    result = result + xmlQueryString;
    result = result + "<elapsed-time-ms>" + elapsedTime + "</elapsed-time-ms>";
    if (lemmas != null && ! lemmas.isEmpty()) {
      result = result + "<morphology>";
      for (int i=0; i<lemmas.size(); i++) {
        Lemma lemma = lemmas.get(i);
        String lemmaName = lemma.getLemmaName();
        String language = lemma.getLanguage();
        result = result + "<lemma>";
        result = result + "<name>" + lemmaName + "</name>";
        if (outputType != null && outputType.equals("full")) {
          String lemmaProvider = lemma.getProvider();
          result = result + "<provider>" + lemmaProvider + "</provider>";
          result = result + "<language>" + language + "</language>";
        }
        if (Language.getInstance().isArabic(language) || Language.getInstance().isLatin(language)) {
          String remoteUrl = "http://www.perseus.tufts.edu/hopper/morph?l=" + lemmaName + "&amp;la=" + language;
          result = result + "<remoteUrl>" + remoteUrl + "</remoteUrl>";
        } else if (Language.getInstance().isGreek(language)) {
          String remoteUrl = "http://www.perseus.tufts.edu/hopper/morph?l=" + lemmaName + "&amp;la=" + "greek";
          result = result + "<remoteUrl>" + remoteUrl + "</remoteUrl>";
        }
        if (outputType != null && outputType.equals("full")) {
          ArrayList<Form> forms = lemma.getFormsList();
          Collections.sort(forms);
          if (forms != null && ! forms.isEmpty()) {
            result = result + "<forms>";
            for (int j=0; j<forms.size(); j++) {
              result = result + "<form>";
              Form f = forms.get(j);
              String formName = f.getFormName();
              String formProvider = f.getProvider();
              result = result + "<provider>" + formProvider + "</provider>";
              result = result + "<language>" + language + "</language>";
              result = result + "<name>" + formName + "</name>";
              result = result + "</form>";
            }
            result = result + "</forms>";
          }
        }
        result = result + "</lemma>";
      }
      result = result + "</morphology>";
    }
    result = result + "</result>";
    return result;
  }
  
  private String createHtmlOutputString(String query, ArrayList<Lemma> lemmas, String outputType, String elapsedTime) {
    String result = "";
    result = result + "<html>";
    result = result + "<head>";
    result = result + "<title>Lemmas for: \"" + query + "\"</title>";
    result = result + "</head>";
    result = result + "<body>";
    result = result + "<table align=\"right\" valign=\"top\">";
    result = result + "<td>[<i>This is a MPIWG language technology service</i>] <a href=\"/mpiwg-mpdl-cms-web/lt-services.html\"><img src=\"/mpiwg-mpdl-cms-web/images/info.png\" valign=\"bottom\" width=\"15\" height=\"15\" border=\"0\" alt=\"MPIWG language technology service\"/></a></td>";
    result = result + "</table>";
    result = result + "<p/>";
    result = result + "<h1>Lemmas for: \"" + query + "\"</h1>";
    if (lemmas != null && ! lemmas.isEmpty()) {
      result = result + "<h3>Morphology</h3>";
      result = result + "<ul>";
      result = result + "<p/>";
      for (int i=0; i<lemmas.size(); i++) {
        Lemma lemma = lemmas.get(i);
        String lemmaName = lemma.getLemmaName();
        String language = lemma.getLanguage();
        result = result + "<li>";
        result = result + lemmaName;
        if (outputType != null && outputType.equals("full")) {
          String lemmaProvider = lemma.getProvider();
          result = result + " (data provider: " + lemmaProvider + ")";
        }
        if (Language.getInstance().isArabic(language) || Language.getInstance().isLatin(language))
          result = result + " (external link: <a href=\"http://www.perseus.tufts.edu/hopper/morph?l=" + lemmaName + "&amp;la=" + language + "\">" + lemmaName + "</a>)";
        else if (Language.getInstance().isGreek(language))
          result = result + " (external link: <a href=\"http://www.perseus.tufts.edu/hopper/morph?l=" + lemmaName + "&amp;la=" + "greek" + "\">" + lemmaName + "</a>)";
        if (outputType != null && outputType.equals("full")) {
          ArrayList<Form> forms = lemma.getFormsList();
          Collections.sort(forms);
          if (forms != null && ! forms.isEmpty()) {
            result = result + "<ul>";
            for (int j=0; j<forms.size(); j++) {
              Form f = forms.get(j);
              String formName = f.getFormName();
              String formProvider = f.getProvider();
              result = result + formName + " (data provider: " + formProvider + "), ";
            }
            result = result.substring(0, result.length() - 2);  // without last comma and blank
            result = result + "</ul>";
          }
        }
        result = result + "</li>";
      }
      result = result + "</ul>";
    }
    result = result + "[* external links may not function]";
    result = result + "<hr/>";
    result = result + "<p/>";
    result = result + "Elapsed time: " + elapsedTime + " ms, see the <a href=\"/mpiwg-mpdl-lt-web/index.html\">service description</a> of this page, if you find a bug <a href=\"https://it-dev.mpiwg-berlin.mpg.de/tracs/mpdl-project-software/newticket\">let us know</a>";
    result = result + "</body>";
    result = result + "</html>";
    return result;
  }

  private String createStringOutputString(ArrayList<Lemma> lemmas) {
    String result = "";
    for (int i=0; i<lemmas.size(); i++) {
      Lemma l = lemmas.get(i);
      String lemmaName = l.getLemmaName();
      result = result + lemmaName + " ";
    }
    result = result.substring(0, result.length() - 1);  // without last blank
    return result;
  }
    
  
}