Mercurial > hg > mpdl-group
view software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/text/tokenize/Token.java @ 23:e845310098ba
diverse Korrekturen
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 27 Nov 2012 12:35:19 +0100 |
parents | 4a3641ae14d2 |
children |
line wrap: on
line source
package de.mpg.mpiwg.berlin.mpdl.lt.text.tokenize; public class Token { private String docId; private String language; private int pageNumber; private int lineNumber; private String elementName; // e.g. "TEI:s" private int elementPosition; private int elementPagePosition; private String xmlId; private String xpath; private String content; // original text content private String contentOrig; // word form private String contentReg; // regularized text content private String contentNorm; // normalized word form private String contentMorph; // lemmas separated by blank private int start; // start position private int end; // end position public Token(int start, int end, String content) { this.start = start; this.end = end; this.content = content; this.contentOrig = toWordForm(); } public Token(String docId, String language, int pageNumber, int lineNumber, int elementPosition, String elementName, String xmlId, String xpath, String contentOrig, String contentReg, String contentNorm, String contentMorph) { this.docId = docId; this.language = language; this.pageNumber = pageNumber; this.lineNumber = lineNumber; this.elementPosition = elementPosition; this.elementName = elementName; this.xmlId = xmlId; this.xpath = xpath; this.contentOrig = contentOrig; this.contentReg = contentReg; this.contentNorm = contentNorm; this.contentMorph = contentMorph; } public int getStart() { return start; } public int getEnd() { return end; } public String toWordForm() { if (content != null) return content.toLowerCase(); else return null; } public String toString() { String retStr = ""; if (contentOrig != null) retStr += contentOrig; retStr = retStr + "(" + start + "," + end + ")"; return retStr; } public String toXmlString() { StringBuilder retStr = new StringBuilder(); retStr.append("<token>"); if (docId != null) retStr.append("<docId>" + docId + "</docId>"); if (language != null) retStr.append("<language>" + language + "</language>"); retStr.append("<pageNumber>" + pageNumber + "</pageNumber>"); retStr.append("<elementPosition>" + elementPosition + "</elementPosition>"); retStr.append("<elementPagePosition>" + elementPagePosition + "</elementPagePosition>"); if (elementName != null) retStr.append("<elementName>" + elementName + "</elementName>"); if (contentOrig != null) retStr.append("<contentOrig>" + contentOrig + "</contentOrig>"); retStr.append("</token>"); return retStr.toString(); } public String getContent() { return content; } public void setContent(String content) { this.content = content; this.contentOrig = toWordForm(); } public String getContentOrig() { return contentOrig; } public void setContentOrig(String contentOrig) { this.contentOrig = contentOrig; } public String getContentReg() { return contentReg; } public void setContentReg(String contentReg) { this.contentReg = contentReg; } public String getContentNorm() { return contentNorm; } public void setContentNorm(String contentNorm) { this.contentNorm = contentNorm; } public String getContentMorph() { return contentMorph; } public void setContentMorph(String contentMorph) { this.contentMorph = contentMorph; } public String getDocId() { return docId; } public void setDocId(String docId) { this.docId = docId; } public String getLanguage() { return language; } public void setLanguage(String language) { this.language = language; } public int getPageNumber() { return pageNumber; } public void setPageNumber(int pageNumber) { this.pageNumber = pageNumber; } public int getLineNumber() { return lineNumber; } public void setLineNumber(int lineNumber) { this.lineNumber = lineNumber; } public int getPosition() { return elementPosition; } public void setElementPosition(int elementPosition) { this.elementPosition = elementPosition; } public int getPagePosition() { return elementPagePosition; } public void setElementPagePosition(int elementPagePosition) { this.elementPagePosition = elementPagePosition; } public String getElementName() { return elementName; } public void setElementName(String elementName) { this.elementName = elementName; } public String getXmlId() { return xmlId; } public void setXmlId(String xmlId) { this.xmlId = xmlId; } public String getXpath() { return xpath; } public void setXpath(String xpath) { this.xpath = xpath; } public void setStart(int start) { this.start = start; } public void setEnd(int end) { this.end = end; } }