view software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/text/tokenize/Token.java @ 23:e845310098ba

diverse Korrekturen
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Tue, 27 Nov 2012 12:35:19 +0100
parents 4a3641ae14d2
children
line wrap: on
line source

package de.mpg.mpiwg.berlin.mpdl.lt.text.tokenize;

public class Token {
  private String docId;
  private String language;
  private int pageNumber;
  private int lineNumber;
  private String elementName; // e.g. "TEI:s"
  private int elementPosition;
  private int elementPagePosition;
  private String xmlId;
  private String xpath;
  private String content; // original text content
  private String contentOrig;  // word form
  private String contentReg;  // regularized text content
  private String contentNorm;  // normalized word form
  private String contentMorph;  // lemmas separated by blank
  private int start; // start position
  private int end;  // end position
  
  public Token(int start, int end, String content) {
    this.start = start;
    this.end = end;
    this.content = content;
    this.contentOrig = toWordForm();
  }

  public Token(String docId, String language, int pageNumber, int lineNumber, int elementPosition, String elementName, String xmlId, String xpath, String contentOrig, String contentReg, String contentNorm, String contentMorph) {
    this.docId = docId;
    this.language = language;
    this.pageNumber = pageNumber;
    this.lineNumber = lineNumber;
    this.elementPosition = elementPosition;
    this.elementName = elementName;
    this.xmlId = xmlId;
    this.xpath = xpath;
    this.contentOrig = contentOrig;
    this.contentReg = contentReg;
    this.contentNorm = contentNorm;
    this.contentMorph = contentMorph;
  }

  public int getStart() {
    return start;
  }

  public int getEnd() {
    return end;
  }
  
  public String toWordForm() {
    if (content != null)
      return content.toLowerCase();
    else 
      return null;
  }
  
  public String toString() {
    String retStr = "";
    if (contentOrig != null)
      retStr += contentOrig;
    retStr = retStr + "(" + start + "," + end + ")";
    return retStr;
  }
 
  public String toXmlString() {
    StringBuilder retStr = new StringBuilder();
    retStr.append("<token>");
    if (docId != null)
      retStr.append("<docId>" + docId + "</docId>");
    if (language != null)
      retStr.append("<language>" + language + "</language>");
    retStr.append("<pageNumber>" + pageNumber + "</pageNumber>");
    retStr.append("<elementPosition>" + elementPosition + "</elementPosition>");
    retStr.append("<elementPagePosition>" + elementPagePosition + "</elementPagePosition>");
    if (elementName != null)
      retStr.append("<elementName>" + elementName + "</elementName>");
    if (contentOrig != null)
      retStr.append("<contentOrig>" + contentOrig + "</contentOrig>");
    retStr.append("</token>");
    return retStr.toString();
  }

  public String getContent() {
    return content;
  }

  public void setContent(String content) {
    this.content = content;
    this.contentOrig = toWordForm();
  }

  public String getContentOrig() {
    return contentOrig;
  }

  public void setContentOrig(String contentOrig) {
    this.contentOrig = contentOrig;
  }

  public String getContentReg() {
    return contentReg;
  }

  public void setContentReg(String contentReg) {
    this.contentReg = contentReg;
  }

  public String getContentNorm() {
    return contentNorm;
  }

  public void setContentNorm(String contentNorm) {
    this.contentNorm = contentNorm;
  }

  public String getContentMorph() {
    return contentMorph;
  }

  public void setContentMorph(String contentMorph) {
    this.contentMorph = contentMorph;
  }

  public String getDocId() {
    return docId;
  }

  public void setDocId(String docId) {
    this.docId = docId;
  }

  public String getLanguage() {
    return language;
  }

  public void setLanguage(String language) {
    this.language = language;
  }

  public int getPageNumber() {
    return pageNumber;
  }

  public void setPageNumber(int pageNumber) {
    this.pageNumber = pageNumber;
  }

  public int getLineNumber() {
    return lineNumber;
  }

  public void setLineNumber(int lineNumber) {
    this.lineNumber = lineNumber;
  }

  public int getPosition() {
    return elementPosition;
  }

  public void setElementPosition(int elementPosition) {
    this.elementPosition = elementPosition;
  }

  public int getPagePosition() {
    return elementPagePosition;
  }

  public void setElementPagePosition(int elementPagePosition) {
    this.elementPagePosition = elementPagePosition;
  }

  public String getElementName() {
    return elementName;
  }

  public void setElementName(String elementName) {
    this.elementName = elementName;
  }

  public String getXmlId() {
    return xmlId;
  }

  public void setXmlId(String xmlId) {
    this.xmlId = xmlId;
  }

  public String getXpath() {
    return xpath;
  }

  public void setXpath(String xpath) {
    this.xpath = xpath;
  }

  public void setStart(int start) {
    this.start = start;
  }

  public void setEnd(int end) {
    this.end = end;
  }
}