#include <bt_rlp_token_iterator.h>
Public Member Functions | |
virtual void | Destroy ()=0 |
Destroys the iterator. | |
virtual BT_Char16 const * | GetCompoundComponent (BT_UInt32 index) const =0 |
Returns a compound component. | |
virtual BT_Char16 const * | GetDictionaryForm () const =0 |
Returns the dictionary form (stem) of the token. | |
virtual BT_UInt32 | GetEndOffset () const =0 |
Returns the end offset + 1 for the token in the UTF-16 text. | |
virtual BT_UInt32 | GetIndex () const =0 |
Returns the index of the token in the stream of tokens. | |
virtual BT_Char16 const * | GetLemmaForm () const =0 |
Returns the dictionary form (lemma) of the token. | |
virtual BT_Char16 const * | GetManyToOneNormalForm () const =0 |
Returns the many-to-one normalized form of the token. | |
virtual BT_Char16 const * | GetNormalForm () const =0 |
Returns the normalized form of the token. | |
virtual BT_UInt32 | GetNumberOfAnalyses () const =0 |
Returns the number of analyses for the token. | |
virtual BT_UInt32 | GetNumberOfCompoundComponents () const =0 |
Returns the number of compound components for the token. | |
virtual BT_UInt32 | GetNumberOfReadings () const =0 |
Returns the number of readings for the token. | |
virtual char const * | GetPartOfSpeech () const =0 |
Returns the part-of-speech tag for the token. | |
virtual BT_Char16 const * | GetReading (BT_UInt32 index) const =0 |
Returns a reading. | |
virtual BT_Char16 const * | GetRootForm () const =0 |
Returns the dictionary form (root) of the token. | |
virtual BT_UInt32 | GetSourceId () const =0 |
Returns the source name ID of the token. | |
virtual BT_Char16 const * | GetSourceName (BT_UInt32 id) const =0 |
Returns the source name of the token. | |
virtual BT_UInt32 | GetStartOffset () const =0 |
Returns the start offset for the token in the UTF-16 text. | |
virtual BT_Char16 const * | GetStemForm () const =0 |
Returns the dictionary form (stem) of the token. | |
virtual BT_Char16 const * | GetToken () const =0 |
Returns the token as a string. | |
virtual bool | IsStopword () const =0 |
Returns whether the token is marked as a stopword. | |
virtual bool | Next ()=0 |
Advances to the next token. | |
virtual bool | NextAnalysis ()=0 |
Advances to the next analysis. | |
virtual BT_UInt32 | Size () const =0 |
Returns the total number of tokens. |
Create these iterators by using the BT_RLP_TokenIteratorFactory class. Destroy them via their 'Destroy' methods.
Caution: In the case of stems and normalized tokens, if the result vector does not exist, the iterator returns 0 for each corresponding token; if the result vector exists but is empty, the iterator returns an empty string for the corresponding token.
virtual void BT_RLP_TokenIterator::Destroy | ( | ) | [pure virtual] |
Deletes the iterator.
virtual BT_Char16 const* BT_RLP_TokenIterator::GetCompoundComponent | ( | BT_UInt32 | index | ) | const [pure virtual] |
Returns a pointer to a compound component.
index | specifies the component to return. |
virtual BT_Char16 const* BT_RLP_TokenIterator::GetDictionaryForm | ( | ) | const [pure virtual] |
Returns a pointer to the data from BT_RLP_STEM.
virtual BT_UInt32 BT_RLP_TokenIterator::GetEndOffset | ( | ) | const [pure virtual] |
Returns the end offset + 1 for the token in the UTF-16 text (BT_RLP_TOKEN_OFFSET).
virtual BT_UInt32 BT_RLP_TokenIterator::GetIndex | ( | ) | const [pure virtual] |
Returns the index of the token in the stream of tokens (BT_RLP_TOKEN).
virtual BT_Char16 const* BT_RLP_TokenIterator::GetLemmaForm | ( | ) | const [pure virtual] |
Returns a pointer to the data from BT_RLP_LEMMA (for tokens) or BT_RLP_ALTERNATIVE_LEMMAS (for analyses). May be an empty string; 0 if this result type is not returned for the language being processed.
virtual BT_Char16 const* BT_RLP_TokenIterator::GetManyToOneNormalForm | ( | ) | const [pure virtual] |
Returns a pointer to the data from BT_RLP_MANY_TO_ONE_NORMALIZED_TOKEN. May be an empty string; 0 if this result type is not returned for the language being processed.
virtual BT_Char16 const* BT_RLP_TokenIterator::GetNormalForm | ( | ) | const [pure virtual] |
Returns a pointer to the data from BT_RLP_NORMALIZED_TOKEN (for tokens) or BT_RLP_ALTERNATIVE_NORM (for analyses). May be an empty string; 0 if this result type is not returned for the language being processed.
virtual BT_UInt32 BT_RLP_TokenIterator::GetNumberOfAnalyses | ( | ) | const [pure virtual] |
Returns the number of analyses for the token.
virtual BT_UInt32 BT_RLP_TokenIterator::GetNumberOfCompoundComponents | ( | ) | const [pure virtual] |
Returns the number of compound components for the token (BT_RLP_COMPOUND).
virtual BT_UInt32 BT_RLP_TokenIterator::GetNumberOfReadings | ( | ) | const [pure virtual] |
Returns the number of readings for the token (BT_RLP_READING).
virtual char const* BT_RLP_TokenIterator::GetPartOfSpeech | ( | ) | const [pure virtual] |
Returns the part-of-speech tag for the token (BT_RLP_PART_OF_SPEECH), or analysis (BT_RLP_ALTERNATIVE_PARTS_OF_SPEECH).
Returns a pointer to a reading.
index | specifies the reading to return |
virtual BT_Char16 const* BT_RLP_TokenIterator::GetRootForm | ( | ) | const [pure virtual] |
Returns a pointer to the data from BT_RLP_ROOTS (for tokens) or BT_RLP_ALTERNATIVE_ROOTS (for analyses). May be an empty string; 0 if this result type is not returned for the language being processed.
virtual BT_UInt32 BT_RLP_TokenIterator::GetSourceId | ( | ) | const [pure virtual] |
Returns a pointer to the data from BT_RLP_TOKEN_SOURCE_ID.
Returns a pointer to the data from BT_RLP_TOKEN_SOURCE_NAME.
id | specifies the ID of the source name. |
virtual BT_UInt32 BT_RLP_TokenIterator::GetStartOffset | ( | ) | const [pure virtual] |
Returns the start offset for the token in the UTF-16 text (BT_RLP_TOKEN_OFFSET).
virtual BT_Char16 const* BT_RLP_TokenIterator::GetStemForm | ( | ) | const [pure virtual] |
Returns a pointer to the data from BT_RLP_STEM (for tokens) or BT_RLP_ALTERNATIVE_STEMS (for analyses). May be an empty string; 0 if this result type is not returned for the language being processed.
virtual BT_Char16 const* BT_RLP_TokenIterator::GetToken | ( | ) | const [pure virtual] |
Returns a pointer to the data from BT_RLP_TOKEN.
virtual bool BT_RLP_TokenIterator::IsStopword | ( | ) | const [pure virtual] |
Returns whether the token is marked as a stopword (BT_RLP_STOPWORD).
virtual bool BT_RLP_TokenIterator::Next | ( | ) | [pure virtual] |
Advances to the next token.
virtual bool BT_RLP_TokenIterator::NextAnalysis | ( | ) | [pure virtual] |
Advances to the next analysis.
virtual BT_UInt32 BT_RLP_TokenIterator::Size | ( | ) | const [pure virtual] |
Returns the total number of tokens.
Copyright © 2004-2008 Basis Technology Corporation. All Rights Reserved.