#include "bt_types.h"
Defines | |
#define | BT_RLP_ALTERNATIVE_LEMMAS 34 |
Alternative Lemmas (for semitic languages). | |
#define | BT_RLP_ALTERNATIVE_NORM 33 |
Alternative Normalized Tokens (for semitic languages). | |
#define | BT_RLP_ALTERNATIVE_PARTS_OF_SPEECH 37 |
Alternative parts of speech tags. | |
#define | BT_RLP_ALTERNATIVE_ROOTS 35 |
Alternative Roots (for semitic languages). | |
#define | BT_RLP_ALTERNATIVE_STEMS 36 |
Alternative Stems (for semitic languages). | |
#define | BT_RLP_BASE_NOUN_PHRASE 3 |
Base Noun Phrases (start token offset, end token offset). | |
#define | BT_RLP_COMPOUND 10 |
Mapping from token indices to vectors of compound components. | |
#define | BT_RLP_DETECTED_ENCODING 5 |
Character encoding of the original data. | |
#define | BT_RLP_DETECTED_LANGUAGE 4 |
The language of the data. | |
#define | BT_RLP_DETECTED_SCRIPT 29 |
The script of the data. | |
#define | BT_RLP_FLAGS 21 |
Not used. | |
#define | BT_RLP_GAZETTEER_NAMES 19 |
Not used. | |
#define | BT_RLP_HTML_DOC 23 |
HTML document. For internal use. | |
#define | BT_RLP_HTML_TABLES 27 |
HTML tables data. For internal use. | |
#define | BT_RLP_HTML_TAGS 22 |
HTML stripped tags data. For internal use. | |
#define | BT_RLP_LANGUAGE_REGION 13 |
Mapping of token indices to language regions identified by their ISO 639 codes. | |
#define | BT_RLP_LEMMA 32 |
Lemmas (for semitic languages). | |
#define | BT_RLP_MANY_TO_ONE_NORMALIZED_TOKEN 38 |
Many-To-One Normalized tokens. | |
#define | BT_RLP_MAP_OFFSETS 28 |
Map offsets of transformed text to original text. | |
#define | BT_RLP_MIME_TYPE 26 |
MIME Type of orignal data. | |
#define | BT_RLP_NAMED_ENTITY 6 |
Named Entities (start token offset, end token offset, entity type). | |
#define | BT_RLP_NORMALIZED_TOKEN 9 |
Normalized tokens. | |
#define | BT_RLP_PART_OF_SPEECH 1 |
Part of Speech tags as strings. | |
#define | BT_RLP_PATHNAME 25 |
Pathname of the file the RAW_DATA came from, if ProcessFile was called. | |
#define | BT_RLP_RAW_DATA 24 |
Raw data at the byte level. Not necessarily Unicode. For internal use. | |
#define | BT_RLP_RAW_TEXT 15 |
The text we are processing, in UTF-16. | |
#define | BT_RLP_READING 11 |
Mapping from token indices to vectors of readings. | |
#define | BT_RLP_ROOTS 20 |
Roots (for semitic languages). | |
#define | BT_RLP_SCRIPT_REGION 12 |
Mapping of token indices to script regions identified by their ISO 15924 codes. | |
#define | BT_RLP_SENTENCE_BOUNDARY 2 |
Sentence boundaries: token offset of one past the end of each sentence. | |
#define | BT_RLP_STEM 8 |
Stems. | |
#define | BT_RLP_STOPWORD 16 |
Token indices of tokens excluded as stopwords. | |
#define | BT_RLP_TEXT_BOUNDARIES 14 |
Mapping of character indices to text boundaries. Used by the language boundary detector. | |
#define | BT_RLP_TOKEN 0 |
The tokens. | |
#define | BT_RLP_TOKEN_OFFSET 7 |
Offsets (start,end) of each token in the raw text. | |
#define | BT_RLP_TOKEN_SOURCE_ID 30 |
The source ID of the token. | |
#define | BT_RLP_TOKEN_SOURCE_NAME 31 |
The source name of the token. | |
#define | BT_RLP_TOKEN_VARIATIONS 18 |
Tokens expanded as variations, such as alternative spellings. | |
#define | BT_RLP_TRANSCRIBED_TEXT 17 |
Not used. | |
Typedefs | |
typedef BT_UInt32 | BT_RLP_ResultType |
Data type for RLP result types. |
typedef BT_UInt32 BT_RLP_ResultType |
These types are used to refer to the result data generated by RLP processing.
Copyright © 2004-2008 Basis Technology Corporation. All Rights Reserved.