#include <bt_types.h>
#include <bt_language_names.h>
#include <bt_ne_types.h>
#include <bt_rlp_result_codes.h>
#include <bt_rlp_result_types.h>
#include <bt_rlp_license_types.h>
#include <bt_rlp_internal.h>
#include <bt_rlp_version.h>
#include <bt_log.h>
Defines | |
#define | BT_RLP_CLIBRARY_INTERFACE_VERSION 1 |
C API version number. | |
#define | BT_RLP_NE_Iterator_GetEndOffset BT_RLP_NE_Iterator_GetEndTokenIndex |
Old name for compatibility. | |
#define | BT_RLP_NE_Iterator_GetStartOffset BT_RLP_NE_Iterator_GetStartTokenIndex |
Old name for compatibility. | |
Typedefs | |
typedef struct BT_RLP_ContextC | BT_RLP_ContextC |
Context for performing linguistic analysis. | |
typedef struct BT_RLP_EnvironmentC | BT_RLP_EnvironmentC |
Runtime environment for the Rosette Linguistic Platform (RLP). | |
typedef struct BT_RLP_NE_IteratorC | BT_RLP_NE_IteratorC |
Named Entity Iterator. | |
typedef struct BT_RLP_NE_IteratorFactoryC | BT_RLP_NE_IteratorFactoryC |
RLP Named Entity Iterator Factory. | |
typedef struct BT_RLP_Result_UTF16StringVectorC | BT_RLP_Result_UTF16StringVectorC |
A vector of UTF-16 strings returned as a result item. | |
typedef struct BT_RLP_ResultC | BT_RLP_ResultC |
An individual result from the linguistic analysis. | |
typedef struct BT_RLP_ResultIteratorC | BT_RLP_ResultIteratorC |
RLP Result Iterator. | |
typedef struct BT_RLP_TokenIteratorC | BT_RLP_TokenIteratorC |
Token Iterator. | |
typedef struct BT_RLP_TokenIteratorFactoryC | BT_RLP_TokenIteratorFactoryC |
RLP Token Iterator Factory. | |
Functions | |
bool | BT_RLP_CLibrary_VersionIsCompatible (unsigned long vers) |
Check RLP C Library compatibility. | |
unsigned long | BT_RLP_CLibrary_VersionNumber () |
Retrieves the C API version number as an integer. | |
const char * | BT_RLP_CLibrary_VersionString () |
C API version number as a string. | |
void | BT_RLP_Context_DestroyResultIterator (const BT_RLP_ContextC *contextp, BT_RLP_ResultIteratorC *resitp) |
Destroys a result iterator created by GetResultIterator. | |
void | BT_RLP_Context_DestroyResultStorage (BT_RLP_ContextC *contextp) |
Frees results storage in a context. | |
BT_UInt32 | BT_RLP_Context_GetIntegerResult (const BT_RLP_ContextC *contextp, BT_RLP_ResultType type) |
Gets a result stored as a 32-bit integer for a particular result type. | |
void | BT_RLP_Context_GetProcessorReport (const BT_RLP_ContextC *contextp, char *buffer, BT_UInt32 buffer_size) |
Gets a report on the processors defined in an environment. | |
bool | BT_RLP_Context_GetPropertyValue (const BT_RLP_ContextC *contextp, const char *property_name, char *output, unsigned max_output) |
Gets string property for communication with processors. | |
BT_RLP_ResultIteratorC * | BT_RLP_Context_GetResultIterator (const BT_RLP_ContextC *contextp, BT_RLP_ResultType type) |
Gets a result iterator for a particular result type. | |
const BT_Char8 * | BT_RLP_Context_GetStringResult (const BT_RLP_ContextC *contextp, BT_RLP_ResultType type) |
Gets a result stored as an ASCII string. | |
const BT_UInt32 * | BT_RLP_Context_GetUnsignedIntegerArrayResults (const BT_RLP_ContextC *contextp, BT_RLP_ResultType type, BT_UInt32 *countp) |
Gets a pointer to the data for a particular result type as a vector of unsigned integers. | |
const BT_Char16 * | BT_RLP_Context_GetUTF16StringResult (const BT_RLP_ContextC *contextp, BT_RLP_ResultType type, BT_UInt32 *reslenp) |
Gets a result stored as a UTF-16 string. | |
BT_Result | BT_RLP_Context_ProcessBuffer (BT_RLP_ContextC *contextp, const unsigned char *inbuf, BT_UInt32 inlen, BT_LanguageID lid, const char *character_encoding, const char *mime_type) |
Processes a text buffer within the context. | |
BT_Result | BT_RLP_Context_ProcessFile (BT_RLP_ContextC *contextp, const char *infile, BT_LanguageID lid, const char *character_encoding, const char *mime_type) |
Processes a file within the context. | |
BT_Result | BT_RLP_Context_ProcessUTF16Buffer (BT_RLP_ContextC *contextp, const BT_Char16 *inbuf, BT_UInt32 inlen, BT_LanguageID lid) |
Processes a buffer of text in UTF-16 encoding. | |
void | BT_RLP_Context_SetPropertyValue (BT_RLP_ContextC *contextp, const char *property_name, const char *property_value) |
Sets a string property for communication with processors. | |
BT_RLP_EnvironmentC * | BT_RLP_Environment_Create (void) |
Creates a new RLP environment. | |
void | BT_RLP_Environment_Destroy (BT_RLP_EnvironmentC *envp) |
Deletes the RLP Environment. | |
void | BT_RLP_Environment_DestroyContext (BT_RLP_EnvironmentC *envp, BT_RLP_ContextC *contextp) |
Destroys a context created with GetContextFromFile or GetContextFromBuffer. | |
BT_Result | BT_RLP_Environment_GetContextFromBuffer (BT_RLP_EnvironmentC *envp, const unsigned char *contextspec, BT_UInt32 len, BT_RLP_ContextC **contextpp) |
Creates a context within the current environment, initialized from a buffer. | |
BT_Result | BT_RLP_Environment_GetContextFromFile (BT_RLP_EnvironmentC *envp, const char *context_path, BT_RLP_ContextC **contextpp) |
Creates a context using the current environment, initialized from a file. | |
void | BT_RLP_Environment_GetProcessorReport (const BT_RLP_EnvironmentC *envp, char *buffer, BT_UInt32 buffer_size) |
Get a report on the processors defined in an environment. | |
bool | BT_RLP_Environment_HasLicenseForLanguage (const BT_RLP_EnvironmentC *envp, BT_LanguageID lid, BT_UInt32 functionality) |
Determines if a license exists for the language and functionality. | |
bool | BT_RLP_Environment_HasLicenseForNamedFeature (const BT_RLP_EnvironmentC *envp, const char *feature, BT_UInt32 functionality) |
Determines if a license exists for the feature and functionality. | |
bool | BT_RLP_Environment_HasLicenses (const BT_RLP_EnvironmentC *envp) |
Determines if there are language and feature licenses in the environment. | |
BT_Result | BT_RLP_Environment_InitializeFromBuffer (BT_RLP_EnvironmentC *envp, const unsigned char *buffer, BT_UInt32 len) |
Initializes an environment from a buffer containing configuration information. | |
BT_Result | BT_RLP_Environment_InitializeFromFile (BT_RLP_EnvironmentC *envp, const char *pathname) |
Initializes an environment from a configuration file. | |
const char * | BT_RLP_Environment_RootDirectory (void) |
Queries the BT_ROOT/rlp directory. | |
void | BT_RLP_Environment_SetBTRootDirectory (const char *root_directory_pathname) |
Sets the BT_ROOT directory, globally. | |
void | BT_RLP_Environment_SetLogCallbackFunction (void *info_p, BT_Log_callback_function fcn_p) |
Sets a log callback function. | |
void | BT_RLP_Environment_SetLogLevel (const char *log_level_string) |
Sets the logging control for RLP. | |
bool | BT_RLP_Library_VersionIsCompatible (unsigned long vers) |
C++ library compatibility check. | |
unsigned long | BT_RLP_Library_VersionNumber (void) |
C++ library version number. | |
const char * | BT_RLP_Library_VersionString (void) |
C++ library version number as C string. | |
void | BT_RLP_NE_Iterator_Destroy (BT_RLP_NE_IteratorC *neitp) |
Destroys the Named Entity Iterator. | |
BT_UInt32 | BT_RLP_NE_Iterator_GetEndCharacterOffset (const BT_RLP_NE_IteratorC *neitp) |
Returns the character offset + 1 for the last character in the named entity. | |
BT_UInt32 | BT_RLP_NE_Iterator_GetEndTokenIndex (const BT_RLP_NE_IteratorC *neitp) |
Returns the token index + 1 of the last token in the named entity. | |
BT_UInt32 | BT_RLP_NE_Iterator_GetIndex (const BT_RLP_NE_IteratorC *neitp) |
Returns the index of the named entity in the vector of named entities. | |
const BT_Char16 * | BT_RLP_NE_Iterator_GetNamedEntity (const BT_RLP_NE_IteratorC *neitp) |
Returns normalized text of the current Named Entity. | |
const BT_Char16 * | BT_RLP_NE_Iterator_GetRawNamedEntity (const BT_RLP_NE_IteratorC *neitp) |
Retrieves NE as it occurred in the text. | |
BT_UInt32 | BT_RLP_NE_Iterator_GetStartCharacterOffset (const BT_RLP_NE_IteratorC *neitp) |
Returns the character offset for the first character in the named entity. | |
BT_UInt32 | BT_RLP_NE_Iterator_GetStartTokenIndex (const BT_RLP_NE_IteratorC *neitp) |
Returns the token index of the first token in the named entity. | |
BT_UInt32 | BT_RLP_NE_Iterator_GetType (const BT_RLP_NE_IteratorC *neitp) |
Returns NE type. | |
bool | BT_RLP_NE_Iterator_Next (BT_RLP_NE_IteratorC *neitp) |
Advances to the next named entity. | |
BT_UInt32 | BT_RLP_NE_Iterator_Size (const BT_RLP_NE_IteratorC *neitp) |
Returns the total number of named entities. | |
BT_RLP_NE_IteratorFactoryC * | BT_RLP_NE_IteratorFactory_Create () |
Create a NE iterator factory. | |
BT_RLP_NE_IteratorC * | BT_RLP_NE_IteratorFactory_CreateIterator (BT_RLP_NE_IteratorFactoryC *nifp, BT_RLP_ContextC *contextp) |
Returns a new Named Entity iterator. | |
void | BT_RLP_NE_IteratorFactory_Destroy (BT_RLP_NE_IteratorFactoryC *nifp) |
Destroys a NE iterator factory. | |
bool | BT_RLP_NE_IteratorFactory_GetConsistentType (const BT_RLP_NE_IteratorFactoryC *nifp) |
Gets the flag indicating whether named entity type should be consistent. | |
bool | BT_RLP_NE_IteratorFactory_GetStripAffixes (const BT_RLP_NE_IteratorFactoryC *nifp) |
Reports the current affix handling. | |
void | BT_RLP_NE_IteratorFactory_SetConsistentType (BT_RLP_NE_IteratorFactoryC *nifp, bool flag) |
Sets the flag indicating whether the iterator should return consistent named entity type. | |
void | BT_RLP_NE_IteratorFactory_SetStripAffixes (BT_RLP_NE_IteratorFactoryC *nifp, bool strip) |
Sets affix handling. | |
const BT_Char16 * | BT_RLP_Result_AsCountedUTF16String (const BT_RLP_ResultC *resp, BT_UInt32 *lenp) |
Returns a counted UTF-16 string. | |
void | BT_RLP_Result_AsIntegerPair (const BT_RLP_ResultC *resp, BT_UInt32 *ap, BT_UInt32 *bp) |
Returns a pair of unsigned 32-bit integers. | |
void | BT_RLP_Result_AsIntegerTriple (const BT_RLP_ResultC *resp, BT_UInt32 *ap, BT_UInt32 *bp, BT_UInt32 *cp) |
Returns a triple of unsigned 32-bit integers. | |
void | BT_RLP_Result_AsIntegerUTF16StringVectorPair (const BT_RLP_ResultC *resp, BT_UInt32 *indexp, const BT_RLP_Result_UTF16StringVectorC **stringspp) |
Returns a pair consisting of an integer (token) index and a vector of UTF-16 Strings. | |
BT_Int32 | BT_RLP_Result_AsSignedInteger (const BT_RLP_ResultC *resp) |
Returns a signed 32-bit integer. | |
const BT_Char8 * | BT_RLP_Result_AsString (const BT_RLP_ResultC *resp) |
Returns a null-terminated 8-bit string. | |
BT_UInt32 | BT_RLP_Result_AsUnsignedInteger (const BT_RLP_ResultC *resp) |
Returns an unsigned 32-bit integer. | |
void | BT_RLP_Result_AsUnsignedIntegerVector (const BT_RLP_ResultC *resp, BT_UInt32 *vector, BT_UInt32 size) |
Returns a vector of unsigned 32-bit integers. | |
const BT_Char16 * | BT_RLP_Result_AsUTF16String (const BT_RLP_ResultC *resp) |
Returns a null-terminated UTF-16 encoded Unicode string. | |
const BT_Char16 * | BT_RLP_Result_UTF16StringVector_Get (const BT_RLP_Result_UTF16StringVectorC *u16vec, BT_UInt32 index) |
Retrieves a UTF-16 string. | |
BT_UInt32 | BT_RLP_Result_UTF16StringVector_Size (const BT_RLP_Result_UTF16StringVectorC *u16vec) |
Examines the size of the UTF-16 string vector. | |
const BT_RLP_ResultC * | BT_RLP_ResultIterator_At (BT_RLP_ResultIteratorC *resitp, size_t index) |
Returns the result at index from the set. | |
const BT_RLP_ResultC * | BT_RLP_ResultIterator_Next (BT_RLP_ResultIteratorC *resitp) |
Returns the next result in the set. | |
void | BT_RLP_ResultIterator_Reset (BT_RLP_ResultIteratorC *resitp) |
Resets the iterator to the beginning. | |
BT_UInt32 | BT_RLP_ResultIterator_Size (const BT_RLP_ResultIteratorC *resitp) |
Returns the number of results available. | |
BT_RLP_ResultType | BT_RLP_ResultIterator_Type (const BT_RLP_ResultIteratorC *resitp) |
Returns the type of results returned by the iterator. | |
void | BT_RLP_TokenIterator_Destroy (BT_RLP_TokenIteratorC *tkitp) |
Destroys a token iterator. | |
const BT_Char16 * | BT_RLP_TokenIterator_GetCompoundComponent (const BT_RLP_TokenIteratorC *tkitp, BT_UInt32 index) |
Returns the Nth compound component. | |
const BT_Char16 * | BT_RLP_TokenIterator_GetDictionayForm (const BT_RLP_TokenIteratorC *tkitp) |
Returns the dictionary form (stem) of the token. | |
BT_UInt32 | BT_RLP_TokenIterator_GetEndOffset (const BT_RLP_TokenIteratorC *tkitp) |
Returns the end offset for the token in the UTF-16 text (BT_RLP_TOKEN_OFFSET). | |
BT_UInt32 | BT_RLP_TokenIterator_GetIndex (const BT_RLP_TokenIteratorC *tkitp) |
Returns the index of the token in the stream of tokens (BT_RLP_TOKEN). | |
const BT_Char16 * | BT_RLP_TokenIterator_GetLemmaForm (const BT_RLP_TokenIteratorC *tkitp) |
Returns the dictionary form (lemma) of the token. | |
const BT_Char16 * | BT_RLP_TokenIterator_GetManyToOneNormalizedForm (const BT_RLP_TokenIteratorC *tkitp) |
Returns the many-to-one normalized form of the token. | |
const BT_Char16 * | BT_RLP_TokenIterator_GetNormalForm (const BT_RLP_TokenIteratorC *tkitp) |
Returns the normalized form of the token. | |
BT_UInt32 | BT_RLP_TokenIterator_GetNumberOfAnalyses (const BT_RLP_TokenIteratorC *tkitp) |
Returns the number of alternative analyses for the token. | |
BT_UInt32 | BT_RLP_TokenIterator_GetNumberOfCompoundComponents (const BT_RLP_TokenIteratorC *tkitp) |
Returns the number of compound components for the token. (BT_RLP_COMPOUND). | |
BT_UInt32 | BT_RLP_TokenIterator_GetNumberOfReadings (const BT_RLP_TokenIteratorC *tkitp) |
Returns the number of readings for the token (BT_RLP_READING). | |
const char * | BT_RLP_TokenIterator_GetPartOfSpeech (const BT_RLP_TokenIteratorC *tkitp) |
Returns the part-of-speech tag for the token. | |
const BT_Char16 * | BT_RLP_TokenIterator_GetReading (const BT_RLP_TokenIteratorC *tkitp, BT_UInt32 index) |
Returns the Nth reading. | |
const BT_Char16 * | BT_RLP_TokenIterator_GetRootForm (const BT_RLP_TokenIteratorC *tkitp) |
Returns the dictionary form (root) of the token. | |
BT_UInt32 | BT_RLP_TokenIterator_GetSourceId (const BT_RLP_TokenIteratorC *tkitp) |
Returns the source name ID of the token. | |
const BT_Char16 * | BT_RLP_TokenIterator_GetSourceName (const BT_RLP_TokenIteratorC *tkitp, BT_UInt32 id) |
Returns the source name of the token. | |
BT_UInt32 | BT_RLP_TokenIterator_GetStartOffset (const BT_RLP_TokenIteratorC *tkitp) |
Returns the start offset for the token in the UTF-16 text (BT_RLP_TOKEN_OFFSET). | |
const BT_Char16 * | BT_RLP_TokenIterator_GetStemForm (const BT_RLP_TokenIteratorC *tkitp) |
Returns the dictionary form (stem) of the token. | |
const BT_Char16 * | BT_RLP_TokenIterator_GetToken (const BT_RLP_TokenIteratorC *tkitp) |
Return the token as a string. | |
bool | BT_RLP_TokenIterator_IsStopword (const BT_RLP_TokenIteratorC *tkitp) |
Returns whether the token is marked as a stopword (BT_RLP_STOPWORD). | |
bool | BT_RLP_TokenIterator_Next (BT_RLP_TokenIteratorC *tkitp) |
Advances to the next token. | |
bool | BT_RLP_TokenIterator_NextAnalysis (BT_RLP_TokenIteratorC *tkitp) |
Checks if there are more alternative analyses. | |
BT_UInt32 | BT_RLP_TokenIterator_Size (const BT_RLP_TokenIteratorC *tkitp) |
Return the total number of tokens. | |
BT_RLP_TokenIteratorFactoryC * | BT_RLP_TokenIteratorFactory_Create (void) |
Creates a token factory iterator. | |
BT_RLP_TokenIteratorC * | BT_RLP_TokenIteratorFactory_CreateIterator (BT_RLP_TokenIteratorFactoryC *tkitfacp, const BT_RLP_ContextC *contextp) |
Returns a new token iterator. | |
void | BT_RLP_TokenIteratorFactory_Destroy (BT_RLP_TokenIteratorFactoryC *tkitfacp) |
Destroys a token iterator factory. | |
bool | BT_RLP_TokenIteratorFactory_GetReturnCompoundComponents (const BT_RLP_TokenIteratorFactoryC *tkitfacp) |
Returns the setting for compound components. | |
bool | BT_RLP_TokenIteratorFactory_GetReturnReadings (const BT_RLP_TokenIteratorFactoryC *tkitfacp) |
Returns the setting for readings. | |
void | BT_RLP_TokenIteratorFactory_SetReturnCompoundComponents (BT_RLP_TokenIteratorFactoryC *tkitfacp, bool flag) |
Specifies whether iterators should return compound components. | |
void | BT_RLP_TokenIteratorFactory_SetReturnReadings (BT_RLP_TokenIteratorFactoryC *tkitfacp, bool flag) |
Specifies whether iterators should return readings. |
#define BT_RLP_CLIBRARY_INTERFACE_VERSION 1 |
Version number of this C API of this header file.
typedef struct BT_RLP_ContextC BT_RLP_ContextC |
This structure represents a specific runtime context for extracting linguistic data from a single document. Multiple context structures can be created from a single environment, each running independently of the other. Contexts may not be shared among threads.
Use BT_RLP_Environment_GetContextFromFile() or BT_RLP_Environment_GetContextFromBuffer() to obtain this structure. Make sure to call BT_RLP_Environment_DestroyContext() when you are done with this.
typedef struct BT_RLP_EnvironmentC BT_RLP_EnvironmentC |
This structure represents the RLP runtime environment and is responsible for maintaining input, output, and language processors.
Call BT_RLP_Environment_Create() to obtain the pointer to this structure; only one can exist per process at a time.
typedef struct BT_RLP_NE_IteratorC BT_RLP_NE_IteratorC |
This structure represents a single iterator for named entities.
Create these iterators by using BT_RLP_NE_IteratorFactory_CreateIterator(). Free the resources associated with the iterator by calling BT_RLP_NE_Iterator_Destroy().
typedef struct BT_RLP_NE_IteratorFactoryC BT_RLP_NE_IteratorFactoryC |
This structure represents a factory to create a structure of type BT_RLP_NE_IteratorC. To use this structure, create a factory and then call into it to set the parameters for the iterator(s) that you need.
Call BT_RLP_NE_IteratorFactory_CreateIterator() with a pointer to BT_RLP_ContextC
to get an iterator that reflects the current settings of the factory.
You must free the resources associated with the factory by calling BT_RLP_NE_IteratorFactory_Destroy().
typedef struct BT_RLP_Result_UTF16StringVectorC BT_RLP_Result_UTF16StringVectorC |
This structure represents a vector of UTF-16 strings. BT_RLP_Result_AsIntegerUTF16StringVectorPair() can be used to obtain this structure.
typedef struct BT_RLP_ResultC BT_RLP_ResultC |
This datatype encapsulates an individual result returned by a result iterator. Use functions whose name start with BT_RLP_Result_ to examine the result.
Because each result can contain different types of data, you must use the appropriate accessor for the result. The result data is described in the language processor's documentation.
typedef struct BT_RLP_ResultIteratorC BT_RLP_ResultIteratorC |
This structure is used to access to the results of RLP processing. You get the iterator for a context's results through the BT_RLP_Context_GetResultIterator() function.
Use functions whose name start with BT_RLP_ResultIterator_ to access an indiviual token.
Note that the Next and At actually returns the same address when called for the same iterator. It actually changes what BT_RLP_Result_ functions return. If you do this:
BT_RLP_ResultIteratorC *rip = BT_RLP_Context_GetResultIterator(cxp, BT_RLP_TOKEN); const BT_RLP_ResultC *resp0 = BT_RLP_ResultIterator_Next(); // First token const BT_RLP_ResultC *resp1 = BT_RLP_ResultIterator_Next(); // Second tokenBoth resp0 and resp1 actually point to the same same address, and BT_RLP_RESULT_AsUTF16String(resp0) would return the text of the second token, rather than the fist token. The string return by BT_RLP_RESULT_AsUTF16String(resp0) would need to be duplicated and stored separetely before calling Next(), if the first token is needed later.
It is your responsibility to free the resource associated with the iterator when you are finished with it by calling BT_RLP_Context_DestroyResultIterator().
typedef struct BT_RLP_TokenIteratorC BT_RLP_TokenIteratorC |
This structure represents an iterator that iterates over tokens.
The structure can be obtained by calling BT_RLP_TokenIteratorFactory_CreateIterator(). Call BT_RLP_TokenIterator_Destroy() to free the resources.
typedef struct BT_RLP_TokenIteratorFactoryC BT_RLP_TokenIteratorFactoryC |
This structure represents a "factory" to create an token iterator represented by BT_RLP_Token_IteratorC.
To access results of processing, create a factory by calling BT_RLP_TokenIteratorFactory_Create(), and then call into it to set the parameters for the iterator(s) that you need by calling BT_RLP_TokenIteratorFactory_SetReturnCompoundComponents() or BT_RLP_TokenIteratorFactory_SetReturnReadings(). Call BT_RLP_TokenIteratorFactory_CreateIterator() with a BT_RLP_ContextC to get an iterator that reflects the current settings of the factory.
You must free the factory and associated resources by calling BT_RLP_TokenIteratorFactory_Destroy().
bool BT_RLP_CLibrary_VersionIsCompatible | ( | unsigned long | vers | ) |
This function checks if the header used is compatible with the library being linked. It also checks if the correct underlying RLP core library is being linked.
vers | You MUST give BT_RLP_LIBRARY_INTERFACE_VERSION here. |
unsigned long BT_RLP_CLibrary_VersionNumber | ( | ) |
This function returns an integer representing the version of the linked C API (not C++) library. Use BT_RLP_Library_VersionNumber() to check the underlying C++ library.
const char* BT_RLP_CLibrary_VersionString | ( | ) |
This function returns a C string represents the version of the linked C API (not C++) library. Use BT_RLP_Library_VersionString() to check the underlying C++ library.
void BT_RLP_Context_DestroyResultIterator | ( | const BT_RLP_ContextC * | contextp, | |
BT_RLP_ResultIteratorC * | resitp | |||
) |
This function destroys an iterator created with the BT_RLP_Context_GetResultIterator().
contextp | Pointer to the RLP context. | |
resitp | Pointer to the iterator to be destroyed. |
void BT_RLP_Context_DestroyResultStorage | ( | BT_RLP_ContextC * | contextp | ) |
This function frees any storage allocated for results in the context. This operation is automatic when the context is destroyed or when a new Process call is made, but for some purposes the caller might wish to do this explicitly.
contextp | Pointer to the RLP context. |
BT_UInt32 BT_RLP_Context_GetIntegerResult | ( | const BT_RLP_ContextC * | contextp, | |
BT_RLP_ResultType | type | |||
) |
This function returns a 32-bit integer containing the value of the supplied result type.
contextp | Pointer to the RLP context. | |
type | The type of result to return. |
void BT_RLP_Context_GetProcessorReport | ( | const BT_RLP_ContextC * | contextp, | |
char * | buffer, | |||
BT_UInt32 | buffer_size | |||
) |
This function fills in a string with a report on the processors defined in the environment. The string is delimited with vertical bars:
proc0|desc0|proc1|desc1|...|procN|descN||
The string will be null-terminated on return. If the string was truncated, the trailing | will be missing.
contextp | Pointer to the RLP context. | |
buffer | Destination for the report. | |
buffer_size | Tthe maximum length of the report buffer. |
bool BT_RLP_Context_GetPropertyValue | ( | const BT_RLP_ContextC * | contextp, | |
const char * | property_name, | |||
char * | output, | |||
unsigned | max_output | |||
) |
This function retrieves the value of a named property on the context. This API is provided so that your application can see what it has stored for a property value.
contextp | Pointer to the RLP context. | |
property_name | The name of the property to get. | |
output | The buffer to receive the value. | |
max_output | The size of the buffer. |
BT_RLP_ResultIteratorC* BT_RLP_Context_GetResultIterator | ( | const BT_RLP_ContextC * | contextp, | |
BT_RLP_ResultType | type | |||
) |
This function returns a result iterator for the supplied result type.
It is your responsibility to destroy the iterator when you are done with it using BT_RLP_Context_DestroyResultIterator().
contextp | Pointer to the RLP context. | |
type | The type of result to return an iterator for. |
const BT_Char8* BT_RLP_Context_GetStringResult | ( | const BT_RLP_ContextC * | contextp, | |
BT_RLP_ResultType | type | |||
) |
This function returns a pointer to a null-terminated ASCII character string containing the value of the supplied result type.
contextp | Pointer to the RLP context. | |
type | The type of result to return. |
const BT_UInt32* BT_RLP_Context_GetUnsignedIntegerArrayResults | ( | const BT_RLP_ContextC * | contextp, | |
BT_RLP_ResultType | type, | |||
BT_UInt32 * | countp | |||
) |
This function returns a pointer to result data for the supplied result type in the form of an array of 32-bit unsgined integers.
You may only call this funtion for those result types which are defined to support it. The results that return character data do not support it.
contextp | Pointer to the RLP context. | |
type | The type of result to return an iterator for. | |
countp | (output) Pointer to a 32-bit unsigned integer variable to which the number of items of the resulting array will be stored. |
const BT_Char16* BT_RLP_Context_GetUTF16StringResult | ( | const BT_RLP_ContextC * | contextp, | |
BT_RLP_ResultType | type, | |||
BT_UInt32 * | reslenp | |||
) |
This function returns a pointer to a UTF-16 string containing the value of the supplied result type. The string may not be terminated by 0.
contextp | Pointer to the RLP context. | |
type | The type of result to return. | |
reslenp | Pointer to an unsigned 32-bit integer variable to which the length of the result string will be stored. |
BT_Result BT_RLP_Context_ProcessBuffer | ( | BT_RLP_ContextC * | contextp, | |
const unsigned char * | inbuf, | |||
BT_UInt32 | inlen, | |||
BT_LanguageID | lid, | |||
const char * | character_encoding, | |||
const char * | mime_type | |||
) |
This function processes the text in a buffer within the context.
This function can be called repeatedly to process multiple buffers with a single context. Existing result iterators are invalid after ProcessFile is called and must not be used.
contextp | Pointer to the RLP context. | |
inbuf | Pointer to the buffer containing the configuration data. The buffer can be discarded after this function is called. | |
inlen | Length (in bytes) of the buffer. | |
lid | Language id indicating the name of the language in the input. Specify BT_LANGUAGE_UNKNOWN, if you want RLI (see RLI in RLP Application Developer's Guide) to determin the language (RLI license required). | |
character_encoding | Character encoding for the data. Specify 0 if you want RLI to detect the encoding (RLI license required). For acceptable encodings, see RCLU Encodings in RLP Application Developer's Guide and Unicode Converter in RLP Application Developer's Guide. | |
mime_type | MIME-type of the data. Specify 0 if not known, or is not required by a subsequent processor. If it is not known and is required by a subsequent processor, mime_detector can detect the MIME-type. For a list of the MIME-types that RLP can handle, see mime_detector in RLP Application Developer's Guide. |
BT_OK | The file was processed correctly. |
BT_Result BT_RLP_Context_ProcessFile | ( | BT_RLP_ContextC * | contextp, | |
const char * | infile, | |||
BT_LanguageID | lid, | |||
const char * | character_encoding, | |||
const char * | mime_type | |||
) |
This function processes the text in a file within the context. The file is read in its entirety and then passed to the context's processors.
This function can be called repeatedly to process multiple files with a single context. Existing result iterators are invalid after ProcessFile is called and must not be used.
contextp | Pointer to the RLP context. | |
infile | The pathname of the input file. | |
lid | Language id indicating the name of the language in the input. Specify BT_LANGUAGE_UNKNOWN, if you want RLI (see RLI in RLP Application Developer's Guide) to determine the language (RLI license required). | |
character_encoding | Character encoding for the data. Specify 0 if you want RLI to detect the encoding (RLI license required). For acceptable encodings, see RCLU Encodings in RLP Application Developer's Guide and Unicode Converter in RLP Application Developer's Guide. | |
mime_type | MIME-type of the data. Specify 0 if not known, or is not required by a subsequent processor. If it is not known and is required by a subsequent processor, mime_detector can detect the MIME-type. For a list of the MIME-types that RLP can handle, see mime_detector in RLP Application Developer's Guide. |
BT_OK | The file was processed correctly. |
BT_Result BT_RLP_Context_ProcessUTF16Buffer | ( | BT_RLP_ContextC * | contextp, | |
const BT_Char16 * | inbuf, | |||
BT_UInt32 | inlen, | |||
BT_LanguageID | lid | |||
) |
This function processes a buffer of native byte-order UTF-16 text within the context.
contextp | Pointer to the RLP context. | |
inbuf | Pointer to the buffer containing the configuration data. The buffer can be discarded after this function is called. | |
inlen | Length (in bytes) of the buffer. | |
lid | Language id indicating the name of the language in the input. Specify BT_LANGUAGE_UNKNOWN, if you want RLI (see RLI in RLP Application Developer's Guide) to determin the language (RLI license required). |
BT_OK | The file was processed correctly. |
void BT_RLP_Context_SetPropertyValue | ( | BT_RLP_ContextC * | contextp, | |
const char * | property_name, | |||
const char * | property_value | |||
) |
This function sets a named property on the context to the specified value. Specific processors document properties that they read to control their operations. In some cases, you may have to construct a string representation of a numeric value to set a property.
contextp | Pointer to the RLP context. | |
property_name | The name of the property to set (e.g. "com.basistech.rexml.output_pathname") | |
property_value | The value to set for the property. |
BT_RLP_EnvironmentC* BT_RLP_Environment_Create | ( | void | ) |
This function creates a new, uninitialized RLP environment, and return a pointer to the structure representing it.
The pointer and the resources associated it must be freed by calling BT_RLP_Environment_Destro() when you are finished with it.
If desired, BT_RLP_Environment_SetXxxxxx() must be called prior to this call.
void BT_RLP_Environment_Destroy | ( | BT_RLP_EnvironmentC * | envp | ) |
This function destroys the RLP Environment and releases associated resources.
envp | Pointer to the RLP environment. |
void BT_RLP_Environment_DestroyContext | ( | BT_RLP_EnvironmentC * | envp, | |
BT_RLP_ContextC * | contextp | |||
) |
This function destroys a context created with the GetContextFromFile or GetContextFromBuffer functions. The context should not be used after it has been destroyed as the pointer will no longer be valid.
envp | Pointer to the RLP environment. | |
contextp | Pointer to the context to be destroyed. |
BT_Result BT_RLP_Environment_GetContextFromBuffer | ( | BT_RLP_EnvironmentC * | envp, | |
const unsigned char * | contextspec, | |||
BT_UInt32 | len, | |||
BT_RLP_ContextC ** | contextpp | |||
) |
This function creates a new context within the current environment as specified in the XML context description given in the C string.
The character data in the buffer should be encoded in UTF-8.
envp | Pointer to the RLP environment. | |
contextspec | C string contains the XML data. The string can be discarded after this function is called. | |
len | Length of the above string. | |
contextpp | Address of a variable of type (BT_RLP_ContextC *) to which a pointer to the newly created context will be stored. |
BT_OK | The context was allocated and initialized correctly. |
BT_Result BT_RLP_Environment_GetContextFromFile | ( | BT_RLP_EnvironmentC * | envp, | |
const char * | context_path, | |||
BT_RLP_ContextC ** | contextpp | |||
) |
This function creates a context with the current environment configuration, based on the named context definition.
The character data in the file should be encoded in UTF-8.
envp | Pointer to the RLP environment. | |
context_path | Pathname to the context configuration file. | |
contextpp | Address of the pointer for the created context. |
BT_OK | The context was allocated and initialized correctly. |
void BT_RLP_Environment_GetProcessorReport | ( | const BT_RLP_EnvironmentC * | envp, | |
char * | buffer, | |||
BT_UInt32 | buffer_size | |||
) |
This function fills in a string with a report on the processors defined in the environment. The string is delimited with vertical bars:
proc0|desc0|proc1|desc1|...|procN|descN||
The string will be null-terminated on return. If the string was truncated, the trailing | will be missing.
envp | Pointer to the RLP environment | |
buffer | C string buffer where the report is stored. | |
buffer_size | the maximum length of the report buffer. |
bool BT_RLP_Environment_HasLicenseForLanguage | ( | const BT_RLP_EnvironmentC * | envp, | |
BT_LanguageID | lid, | |||
BT_UInt32 | functionality | |||
) |
This function determines if a license exists for the language and functionality.
envp | Pointer to the RLP environment | |
lid | Language ID. | |
functionality | Functionality. |
true(1) | There is a license for the language and functionality. |
bool BT_RLP_Environment_HasLicenseForNamedFeature | ( | const BT_RLP_EnvironmentC * | envp, | |
const char * | feature, | |||
BT_UInt32 | functionality | |||
) |
This function determines if a license exists for the feature and functionality.
envp | Pointer to the RLP environment. | |
feature | Feature. | |
functionality | Functionality. |
true | There is a license for the feature and functionality. |
bool BT_RLP_Environment_HasLicenses | ( | const BT_RLP_EnvironmentC * | envp | ) |
This function determines if there are language and feature licenses in the environment.
envp | Pointer to the RLP environment |
true(1) | There are some language and feature licenses. |
BT_Result BT_RLP_Environment_InitializeFromBuffer | ( | BT_RLP_EnvironmentC * | envp, | |
const unsigned char * | buffer, | |||
BT_UInt32 | len | |||
) |
This function initializes the environment from an XML configuration stored in the buffer.
envp | Pointer to the RLP environment. | |
buffer | The buffer containing the XML data. The buffer can be discarded after this function is called. | |
len | The length of the buffer. |
BT_OK | The environment was initialized. |
BT_Result BT_RLP_Environment_InitializeFromFile | ( | BT_RLP_EnvironmentC * | envp, | |
const char * | pathname | |||
) |
This function initializes the environment from the XML configuration file specified by the argument.
envp | Pointer to the RLP environment. | |
pathname | The pathname to the configuration file. |
BT_OK | The environment was initialized. |
const char* BT_RLP_Environment_RootDirectory | ( | void | ) |
This static function returns the current location of the rlp subdirectory of the installation directory. It returns 0 in the event that no root directory is established. Generally, RLP does not function without a setting for the Basis root directory.
void BT_RLP_Environment_SetBTRootDirectory | ( | const char * | root_directory_pathname | ) |
This static function establishes the root directory of a Basis Technology installation. This directory is known as BT_ROOT or the Basis root directory. RLP-specific files are found under BT_ROOT/rlp.
root_directory_pathname | String specifying the root of the installation. |
void BT_RLP_Environment_SetLogCallbackFunction | ( | void * | info_p, | |
BT_Log_callback_function | fcn_p | |||
) |
This function specifies a callback function to receive logging data, which is encoded in UTF-8. This should be called before any other RLP function. All log channels will be muted by this call. A subsequent call to SetLogLevel should be used to control the log level.
static x_log_callback(void* info_p, int channel, char const* message) { fprintf(stderr, "%d\t%s\n", channel, message); } BT_RLP_Environment_SetLogCallbackFunction(0, x_log_callback); BT_RLP_Environment_SetLogLevel("error");
info_p | Void pointer stored and passed to the callback function each time it is called. The interpretation of this value is solely at the discretion of the client. | |
fcn_p | Function pointer to the callback function, which returns void and takes a void * (the value of info_p), int (channel number), and a char const * (the message). |
void BT_RLP_Environment_SetLogLevel | ( | const char * | log_level_string | ) |
This static function controls the level of diagnostic logging for RLP. Call it before a BT_RLP_Environment is initialized. There are three logging channels that can be enabled:
By default, without calling SetLogLevel, "error" level messages will be logged. To turn off all logging, call SetLogLevel("none").
These can be combined to log multiple levels by separating each one with a comma, e.g., "error,warning". The string "all" is synonymous with "error,warning,info".
The environment variable BT_RLP_LOG_LEVEL can be set similarly to get the same behavior without explicitly calling SetLogLevel.
BT_RLP_Environment_SetLogLevel("error,warning");
log_level_string | String specifying which of the standard channels should be logged. |
bool BT_RLP_Library_VersionIsCompatible | ( | unsigned long | vers | ) |
This function checks if the current version of the underlying C++ API library is compatible with the version of the library the object was compiled with.
vers | This must be BT_RLP_LIBRARY_INTERFACE_VERSION from bt_rlp_version.h. |
unsigned long BT_RLP_Library_VersionNumber | ( | void | ) |
This function returns the version number of the underlying C++ class library as an integer.
const char* BT_RLP_Library_VersionString | ( | void | ) |
This function returns the version number of the underlying C++ class library as a string.
void BT_RLP_NE_Iterator_Destroy | ( | BT_RLP_NE_IteratorC * | neitp | ) |
This function destroys the Named Entity Iterator.
neitp | Pointer to the Named Entity iterator. |
BT_UInt32 BT_RLP_NE_Iterator_GetEndCharacterOffset | ( | const BT_RLP_NE_IteratorC * | neitp | ) |
This function returns the character offset + 1 for the last character in the named entity.
neitp | Pointer to the Named Entity iterator. |
BT_UInt32 BT_RLP_NE_Iterator_GetEndTokenIndex | ( | const BT_RLP_NE_IteratorC * | neitp | ) |
This function returns the token index + 1 of the last token in the named entity.
neitp | Pointer to the Named Entity iterator. |
BT_UInt32 BT_RLP_NE_Iterator_GetIndex | ( | const BT_RLP_NE_IteratorC * | neitp | ) |
This function returns the index of the named entity in the vector of named entities.
neitp | Pointer to the Named Entity iterator. |
const BT_Char16* BT_RLP_NE_Iterator_GetNamedEntity | ( | const BT_RLP_NE_IteratorC * | neitp | ) |
This function returns normalized text of the current Named Entity.
neitp | Pointer to the Named Entity iterator. |
const BT_Char16* BT_RLP_NE_Iterator_GetRawNamedEntity | ( | const BT_RLP_NE_IteratorC * | neitp | ) |
This function returns the Named Entity as it occurred in the text.
neitp | Pointer to the Named Entity iterator. |
BT_UInt32 BT_RLP_NE_Iterator_GetStartCharacterOffset | ( | const BT_RLP_NE_IteratorC * | neitp | ) |
This function returns the character offset for the first character in the named entity.
neitp | Pointer to the Named Entity iterator. |
BT_UInt32 BT_RLP_NE_Iterator_GetStartTokenIndex | ( | const BT_RLP_NE_IteratorC * | neitp | ) |
This function returns the token index of the first token in the named entity.
neitp | Pointer to the Named Entity iterator. |
BT_UInt32 BT_RLP_NE_Iterator_GetType | ( | const BT_RLP_NE_IteratorC * | neitp | ) |
This function returns the Named Entity type.
neitp | Pointer to the Named Entity iterator. |
bool BT_RLP_NE_Iterator_Next | ( | BT_RLP_NE_IteratorC * | neitp | ) |
This function advances the iterator to the next named entity.
neitp | Pointer to the Named Entity iterator. |
BT_UInt32 BT_RLP_NE_Iterator_Size | ( | const BT_RLP_NE_IteratorC * | neitp | ) |
This function returns the total number of named entities.
neitp | Pointer to the Named Entity iterator. |
BT_RLP_NE_IteratorFactoryC* BT_RLP_NE_IteratorFactory_Create | ( | ) |
This function creates a Named Entity iterator factory. An NE iterator factory is created with the default properties.
BT_RLP_NE_IteratorC* BT_RLP_NE_IteratorFactory_CreateIterator | ( | BT_RLP_NE_IteratorFactoryC * | nifp, | |
BT_RLP_ContextC * | contextp | |||
) |
This function returns a new Named Entity token iterator. The new iterator will iterate over the tokens of the supplied context.
nifp | Pointer to the Named Entity iterator factory. | |
contextp | Pointer to the context to iterate over. |
void BT_RLP_NE_IteratorFactory_Destroy | ( | BT_RLP_NE_IteratorFactoryC * | nifp | ) |
This function destroys a Named Entity iterator factory.
nifp | Pointer to the Named Entity iterator factory. |
bool BT_RLP_NE_IteratorFactory_GetConsistentType | ( | const BT_RLP_NE_IteratorFactoryC * | nifp | ) |
This function gets the flag indicating whether the iterator should return consistent named entity type for the same normalized named entities.
nifp | Pointer to the Named Entity iterator factory. |
bool BT_RLP_NE_IteratorFactory_GetStripAffixes | ( | const BT_RLP_NE_IteratorFactoryC * | nifp | ) |
This function gets the flag indicating whether the iterator should removed affixes from named entities when returning them.
nifp | Pointer to the Named Entity iterator factory. |
void BT_RLP_NE_IteratorFactory_SetConsistentType | ( | BT_RLP_NE_IteratorFactoryC * | nifp, | |
bool | flag | |||
) |
Sets the flag indicating whether the iterator should return consistent named entity type for the same normalized named entities.
nifp | Pointer to the Named Entity iterator factory. | |
flag | Set to "true" to enforce consistent named entity type. |
void BT_RLP_NE_IteratorFactory_SetStripAffixes | ( | BT_RLP_NE_IteratorFactoryC * | nifp, | |
bool | strip | |||
) |
This function sets the flag indicating whether the iterator should removed affixes from named entities when returning them.
nifp | Pointer to the Named Entity iterator factory. | |
strip | Specify whether or not strip the affixes. |
const BT_Char16* BT_RLP_Result_AsCountedUTF16String | ( | const BT_RLP_ResultC * | resp, | |
BT_UInt32 * | lenp | |||
) |
This function returns a pointer to a non-null-terminated UTF-16 encoded Unicode string and its length. Applies to BT_RLP_RAW_TEXT and BT_RLP_TRANSCRIBED_TEXT.
You should not modify the contents of the returned string.
resp | Pointer to the result. | |
lenp | Pointer to the variable to which the lengh will be stored. |
void BT_RLP_Result_AsIntegerPair | ( | const BT_RLP_ResultC * | resp, | |
BT_UInt32 * | ap, | |||
BT_UInt32 * | bp | |||
) |
This function returns a pair of 32-bit unsigned integer values. It is used for results that are returned as integral 2-tuples. Applies to BT_RLP_TOKEN_OFFSET and BT_RLP_BASE_NOUN_PHRASE.
resp | Pointer to the result. | |
ap | Pointer to the variable to hold the first value. | |
bp | Pointer to the variable to hold the second value. |
void BT_RLP_Result_AsIntegerTriple | ( | const BT_RLP_ResultC * | resp, | |
BT_UInt32 * | ap, | |||
BT_UInt32 * | bp, | |||
BT_UInt32 * | cp | |||
) |
This function returns a triple of 32-bit unsigned integer value It is used for results that are returned as integral 3-tuples. Applies to BT_RLP_NAMED_ENTITY and BT_RLP_SCRIPT_REGION.
resp | Pointer to the result. | |
ap | Pointer to the variable to hold the first value. | |
bp | Pointer to the variable to hold the second value. | |
cp | Pointer to the variable to hold the third value. |
void BT_RLP_Result_AsIntegerUTF16StringVectorPair | ( | const BT_RLP_ResultC * | resp, | |
BT_UInt32 * | indexp, | |||
const BT_RLP_Result_UTF16StringVectorC ** | stringspp | |||
) |
This function returns a pair consisting of an index and a vector of UTF-16 encoded Unicode strings. Applies to BT_RLP_COMPOUND, BT_RLP_READING, BT_RLP_TOKEN_VARIATIONS,BT_RLP_ALTERNATIVE_LEMMAS, BT_RLP_ALTERNATIVE_NORM, BT_RLP_ALTERNATIVE_ROOTS, BT_RLP_ALTERNATIVE_STEMS.
You should not modify the contents of the returned strings.
resp | Pointer to the result. | |
indexp | Address of the variable to hold the returned index value. | |
stringspp | Pointer to a variable to which pointer to the string vector will be stored. |
BT_Int32 BT_RLP_Result_AsSignedInteger | ( | const BT_RLP_ResultC * | resp | ) |
This function returns a signed 32-bit integer value. Reserved for future use.
resp | Pointer to the result. |
const BT_Char8* BT_RLP_Result_AsString | ( | const BT_RLP_ResultC * | resp | ) |
This function returns a pointer to a null-terminated 8-bit character string. No particular encoding is specified and is defined by the processor generating the result. Applies to BT_RLP_PART_OF_SPEECH, BT_RLP_DETECTED_ENCODING, and BT_RLP_MIME_TYPE.
You should not modify the contents of the returned string.
resp | Pointer to the result. |
BT_UInt32 BT_RLP_Result_AsUnsignedInteger | ( | const BT_RLP_ResultC * | resp | ) |
This function returns an unsigned 32-bit integer value. Applies to BT_RLP_DETECTED_LANGUAGE, BT_RLP_STOPWORD, BT_RLP_SENTENCE_BOUNDARY, BT_RLP_TEXT_BOUNDARIES, BT_RLP_MAP_OFFSETS, BT_RLP_TOKEN_SOURCE_ID.
resp | Pointer to the result. |
void BT_RLP_Result_AsUnsignedIntegerVector | ( | const BT_RLP_ResultC * | resp, | |
BT_UInt32 * | vector, | |||
BT_UInt32 | size | |||
) |
This function returns a vector of 32-bit unsigned integer values. It generalizes AsIntegerPair and AsIntegerTriple, and supports processors that create data values organized as larger vectors of integers. Applies to BT_RLP_LANGUAGE_REGION (6 integers).
resp | Pointer to the result. | |
vector | A pointer to a vector of integers to hold the values. | |
size | The number of slots in 'vector'. If this is smaller than the natural number of items for an interation, the code fills the available slots and returns no indication of truncation. |
const BT_Char16* BT_RLP_Result_AsUTF16String | ( | const BT_RLP_ResultC * | resp | ) |
This function returns a pointer to a null-terminated UTF-16 encoded Unicode string. Applies to BT_RLP_TOKEN, BT_RLP_TOKEN_SOURCE_NAME, BT_RLP_STEM, BT_RLP_LEMMA, BT_RLP_MANY_TO_ONE_VARIANT_TOKEN, BT_RLP_NORMALIZED_TOKEN, BT_RLP_ROOTS, BT_RLP_GAZETTEER_NAMES.
You should not modify the contents of the returned string.
resp | Pointer to the result. |
const BT_Char16* BT_RLP_Result_UTF16StringVector_Get | ( | const BT_RLP_Result_UTF16StringVectorC * | u16vec, | |
BT_UInt32 | index | |||
) |
This function retrieves the indexed string from the vector.
u16vec | The vector | |
index | Index of the element of the vector to be retrieved. |
BT_UInt32 BT_RLP_Result_UTF16StringVector_Size | ( | const BT_RLP_Result_UTF16StringVectorC * | u16vec | ) |
This function examines the number of strings in the UTF-16 string vector.
u16vec | The vector. |
const BT_RLP_ResultC* BT_RLP_ResultIterator_At | ( | BT_RLP_ResultIteratorC * | resitp, | |
size_t | index | |||
) |
This function returns the result at index in the set. The pointer returned is valid until the next call the BT_RLP_ResultIterator_Next() or BT_RLP_ResultIterator_At() function. You should neither deallocate nor store it permanently.
resitp | Pointer to the result iterator. | |
index | the location in the set of the result you wish to retrieve |
const BT_RLP_ResultC* BT_RLP_ResultIterator_Next | ( | BT_RLP_ResultIteratorC * | resitp | ) |
This function returns the next result in the set. The pointer returned is valid until the next call the BT_RLP_ResultIterator_Next() of BT_RLP_ResultIterator_At() function. You should neither deallocate nor store it permanently.
resitp | Pointer to the result iterator. |
void BT_RLP_ResultIterator_Reset | ( | BT_RLP_ResultIteratorC * | resitp | ) |
This function resets the iterator to the beginning so that a subsequent call to BT_RLP_ResultIterator_Next() will return the first element in the result.
resitp | Pointer to the result iterator. |
BT_UInt32 BT_RLP_ResultIterator_Size | ( | const BT_RLP_ResultIteratorC * | resitp | ) |
This function returns the number of results available to the iterator.
resitp | Pointer to the result iterator. |
BT_RLP_ResultType BT_RLP_ResultIterator_Type | ( | const BT_RLP_ResultIteratorC * | resitp | ) |
This function returns the type of the result that is returned by this iterator.
resitp | Pointer to the result iterator. |
void BT_RLP_TokenIterator_Destroy | ( | BT_RLP_TokenIteratorC * | tkitp | ) |
This function destroys a token iterator.
tkitp | Pointer to the token iterator to be destroyed. |
const BT_Char16* BT_RLP_TokenIterator_GetCompoundComponent | ( | const BT_RLP_TokenIteratorC * | tkitp, | |
BT_UInt32 | index | |||
) |
This function returns the Nth compound component.
tkitp | Pointer to the token iterator. | |
index | Index of the component to be retrieved. |
const BT_Char16* BT_RLP_TokenIterator_GetDictionayForm | ( | const BT_RLP_TokenIteratorC * | tkitp | ) |
This function returns the data from BT_RLP_STEM.
tkitp | Pointer to the token iterator. |
BT_UInt32 BT_RLP_TokenIterator_GetEndOffset | ( | const BT_RLP_TokenIteratorC * | tkitp | ) |
This function returns the end offset for the token in the UTF-16 text (BT_RLP_TOKEN_OFFSET).
tkitp | Pointer to the token iterator. |
BT_UInt32 BT_RLP_TokenIterator_GetIndex | ( | const BT_RLP_TokenIteratorC * | tkitp | ) |
This function returns the index of the token in the stream of tokens (BT_RLP_TOKEN).
tkitp | Pointer to the token iterator. |
const BT_Char16* BT_RLP_TokenIterator_GetLemmaForm | ( | const BT_RLP_TokenIteratorC * | tkitp | ) |
This function returns the data from BT_RLP_LEMMA (for tokens) or BT_RLP_ALTERNATIVE_LEMMAS (for analyses). May be an empty string; 0 if this result type is not returned for the language being processed.
tkitp | Pointer to the token iterator. |
const BT_Char16* BT_RLP_TokenIterator_GetManyToOneNormalizedForm | ( | const BT_RLP_TokenIteratorC * | tkitp | ) |
This function returns the data from BT_RLP_MANY_TO_ONE_NORMALIZED_TOKEN. May be an empty string; 0 if this result type is not returned for the language being processed.
tkitp | Pointer to the token iterator. |
const BT_Char16* BT_RLP_TokenIterator_GetNormalForm | ( | const BT_RLP_TokenIteratorC * | tkitp | ) |
This function returns the data from BT_RLP_NORMALIZED_TOKEN (for tokens) or BT_RLP_ALTERNATIVE_NORM (for analyses). May be an empty string; 0 if this result type is not returned for the language being processed.
tkitp | Pointer to the token iterator. |
BT_UInt32 BT_RLP_TokenIterator_GetNumberOfAnalyses | ( | const BT_RLP_TokenIteratorC * | tkitp | ) |
This function returns the number of alternative analyses for the token.
tkitp | Pointer to the token iterator. |
BT_UInt32 BT_RLP_TokenIterator_GetNumberOfCompoundComponents | ( | const BT_RLP_TokenIteratorC * | tkitp | ) |
This function returns the number of compound components for the token. (BT_RLP_COMPOUND)
tkitp | Pointer to the token iterator. |
BT_UInt32 BT_RLP_TokenIterator_GetNumberOfReadings | ( | const BT_RLP_TokenIteratorC * | tkitp | ) |
This function returns the number of readings for the token (BT_RLP_READING).
tkitp | Pointer to the token iterator. |
const char* BT_RLP_TokenIterator_GetPartOfSpeech | ( | const BT_RLP_TokenIteratorC * | tkitp | ) |
This function returns the part-of-speech tag for the token (BT_RLP_PART_OF_SPEECH) or analysis (BT_RLP_ALTERNATIVE_PARTS_OF_SPEECH).
tkitp | Pointer to the token iterator. |
const BT_Char16* BT_RLP_TokenIterator_GetReading | ( | const BT_RLP_TokenIteratorC * | tkitp, | |
BT_UInt32 | index | |||
) |
This function retrieves the Nth reading. Basic Linguistic module generates more than one possible readings for a token. Use this functions to obtain each possible reading.
tkitp | Pointer to the token iterator. | |
index | Index of the reading to be retrieved. |
const BT_Char16* BT_RLP_TokenIterator_GetRootForm | ( | const BT_RLP_TokenIteratorC * | tkitp | ) |
This function returns the data from BT_RLP_ROOTS (for tokens) or BT_RLP_ALTERNATIVE_ROOTS (for analyses). May be an empty string; 0 if this result type is not returned for the language being processed.
tkitp | Pointer to the token iterator. |
BT_UInt32 BT_RLP_TokenIterator_GetSourceId | ( | const BT_RLP_TokenIteratorC * | tkitp | ) |
This function returns the data from BT_RLP_TOKEN_SOURCE_ID.
tkitp | Pointer to the token iterator. |
const BT_Char16* BT_RLP_TokenIterator_GetSourceName | ( | const BT_RLP_TokenIteratorC * | tkitp, | |
BT_UInt32 | id | |||
) |
This function returns the data from BT_RLP_TOKEN_SOURCE_NAME.
tkitp | Pointer to the token iterator. | |
id | specifies the ID of the source name |
BT_UInt32 BT_RLP_TokenIterator_GetStartOffset | ( | const BT_RLP_TokenIteratorC * | tkitp | ) |
This function returns the start offset for the token in the UTF-16 text (BT_RLP_TOKEN_OFFSET).
tkitp | Pointer to the token iterator. |
const BT_Char16* BT_RLP_TokenIterator_GetStemForm | ( | const BT_RLP_TokenIteratorC * | tkitp | ) |
This function returns the data from BT_RLP_STEM (for tokens) or BT_RLP_ALTERNATIVE_STEMS (for analyses). May be an empty string; 0 if this result type is not returned for the language being processed.
tkitp | Pointer to the token iterator. |
const BT_Char16* BT_RLP_TokenIterator_GetToken | ( | const BT_RLP_TokenIteratorC * | tkitp | ) |
This function returns the data from BT_RLP_TOKEN.
tkitp | Pointer to the token iterator. |
bool BT_RLP_TokenIterator_IsStopword | ( | const BT_RLP_TokenIteratorC * | tkitp | ) |
This function returns whether the token is marked as a stopword (BT_RLP_STOPWORD).
tkitp | Pointer to the token iterator. |
bool BT_RLP_TokenIterator_Next | ( | BT_RLP_TokenIteratorC * | tkitp | ) |
This function advances the token iterator to the next token.
tkitp | Pointer to the token iterator. |
bool BT_RLP_TokenIterator_NextAnalysis | ( | BT_RLP_TokenIteratorC * | tkitp | ) |
This function checks if there are more alternative analyses. Arabic Basic Linguistic module generates more than one possible analysis for a token. Use this functions to obtain each possible analysis.
tkitp | Pointer to the token iterator. |
BT_UInt32 BT_RLP_TokenIterator_Size | ( | const BT_RLP_TokenIteratorC * | tkitp | ) |
This function return the total number of tokens.
tkitp | Pointer to the token iterator. |
BT_RLP_TokenIteratorFactoryC* BT_RLP_TokenIteratorFactory_Create | ( | void | ) |
This function creates a token factory iterator with the default properties.
BT_RLP_TokenIteratorC* BT_RLP_TokenIteratorFactory_CreateIterator | ( | BT_RLP_TokenIteratorFactoryC * | tkitfacp, | |
const BT_RLP_ContextC * | contextp | |||
) |
This function creates a new token iterator and return a pointer to the BT_RLP_TokenIteratorC structure that represents the token iterator.
tkitfacp | Pointer to the factory. | |
contextp | Pointer to the context to iterate over. |
void BT_RLP_TokenIteratorFactory_Destroy | ( | BT_RLP_TokenIteratorFactoryC * | tkitfacp | ) |
This function destroys a token iterator factory.
tkitfacp | Pointer to the factory to be destroyed. |
bool BT_RLP_TokenIteratorFactory_GetReturnCompoundComponents | ( | const BT_RLP_TokenIteratorFactoryC * | tkitfacp | ) |
This function returns the setting for compound components.
tkitfacp | Pointer to the factory. |
bool BT_RLP_TokenIteratorFactory_GetReturnReadings | ( | const BT_RLP_TokenIteratorFactoryC * | tkitfacp | ) |
This function returns the setting for readings.
tkitfacp | Pointer to the factory. |
void BT_RLP_TokenIteratorFactory_SetReturnCompoundComponents | ( | BT_RLP_TokenIteratorFactoryC * | tkitfacp, | |
bool | flag | |||
) |
This function specifies whether iterators from this factory will retrieve and return compound components.
tkitfacp | Pointer to the factory. | |
flag | Whether the iterators should read out and return compound components. |
void BT_RLP_TokenIteratorFactory_SetReturnReadings | ( | BT_RLP_TokenIteratorFactoryC * | tkitfacp, | |
bool | flag | |||
) |
This function specifies whether iterators from this factory will retrieve and return readings.
tkitfacp | Pointer to the factory. | |
flag | Whether the iterators should read out and return readings. |
Copyright © 2004-2008 Basis Technology Corporation. All Rights Reserved.