bt_rlp_c.h File Reference

Primary C public interface to the Rosette Linguistics Platform. More...

#include <bt_types.h>
#include <bt_language_names.h>
#include <bt_ne_types.h>
#include <bt_rlp_result_codes.h>
#include <bt_rlp_result_types.h>
#include <bt_rlp_license_types.h>
#include <bt_rlp_internal.h>
#include <bt_rlp_version.h>
#include <bt_log.h>

Include dependency graph for bt_rlp_c.h:


Defines

#define BT_RLP_CLIBRARY_INTERFACE_VERSION   1
 C API version number.
#define BT_RLP_NE_Iterator_GetEndOffset   BT_RLP_NE_Iterator_GetEndTokenIndex
 Old name for compatibility.
#define BT_RLP_NE_Iterator_GetStartOffset   BT_RLP_NE_Iterator_GetStartTokenIndex
 Old name for compatibility.

Typedefs

typedef struct BT_RLP_ContextC BT_RLP_ContextC
 Context for performing linguistic analysis.
typedef struct BT_RLP_EnvironmentC BT_RLP_EnvironmentC
 Runtime environment for the Rosette Linguistic Platform (RLP).
typedef struct BT_RLP_NE_IteratorC BT_RLP_NE_IteratorC
 Named Entity Iterator.
typedef struct
BT_RLP_NE_IteratorFactoryC 
BT_RLP_NE_IteratorFactoryC
 RLP Named Entity Iterator Factory.
typedef struct
BT_RLP_Result_UTF16StringVectorC 
BT_RLP_Result_UTF16StringVectorC
 A vector of UTF-16 strings returned as a result item.
typedef struct BT_RLP_ResultC BT_RLP_ResultC
 An individual result from the linguistic analysis.
typedef struct
BT_RLP_ResultIteratorC 
BT_RLP_ResultIteratorC
 RLP Result Iterator.
typedef struct
BT_RLP_TokenIteratorC 
BT_RLP_TokenIteratorC
 Token Iterator.
typedef struct
BT_RLP_TokenIteratorFactoryC 
BT_RLP_TokenIteratorFactoryC
 RLP Token Iterator Factory.

Functions

bool BT_RLP_CLibrary_VersionIsCompatible (unsigned long vers)
 Check RLP C Library compatibility.
unsigned long BT_RLP_CLibrary_VersionNumber ()
 Retrieves the C API version number as an integer.
const char * BT_RLP_CLibrary_VersionString ()
 C API version number as a string.
void BT_RLP_Context_DestroyResultIterator (const BT_RLP_ContextC *contextp, BT_RLP_ResultIteratorC *resitp)
 Destroys a result iterator created by GetResultIterator.
void BT_RLP_Context_DestroyResultStorage (BT_RLP_ContextC *contextp)
 Frees results storage in a context.
BT_UInt32 BT_RLP_Context_GetIntegerResult (const BT_RLP_ContextC *contextp, BT_RLP_ResultType type)
 Gets a result stored as a 32-bit integer for a particular result type.
void BT_RLP_Context_GetProcessorReport (const BT_RLP_ContextC *contextp, char *buffer, BT_UInt32 buffer_size)
 Gets a report on the processors defined in an environment.
bool BT_RLP_Context_GetPropertyValue (const BT_RLP_ContextC *contextp, const char *property_name, char *output, unsigned max_output)
 Gets string property for communication with processors.
BT_RLP_ResultIteratorCBT_RLP_Context_GetResultIterator (const BT_RLP_ContextC *contextp, BT_RLP_ResultType type)
 Gets a result iterator for a particular result type.
const BT_Char8BT_RLP_Context_GetStringResult (const BT_RLP_ContextC *contextp, BT_RLP_ResultType type)
 Gets a result stored as an ASCII string.
const BT_UInt32BT_RLP_Context_GetUnsignedIntegerArrayResults (const BT_RLP_ContextC *contextp, BT_RLP_ResultType type, BT_UInt32 *countp)
 Gets a pointer to the data for a particular result type as a vector of unsigned integers.
const BT_Char16BT_RLP_Context_GetUTF16StringResult (const BT_RLP_ContextC *contextp, BT_RLP_ResultType type, BT_UInt32 *reslenp)
 Gets a result stored as a UTF-16 string.
BT_Result BT_RLP_Context_ProcessBuffer (BT_RLP_ContextC *contextp, const unsigned char *inbuf, BT_UInt32 inlen, BT_LanguageID lid, const char *character_encoding, const char *mime_type)
 Processes a text buffer within the context.
BT_Result BT_RLP_Context_ProcessFile (BT_RLP_ContextC *contextp, const char *infile, BT_LanguageID lid, const char *character_encoding, const char *mime_type)
 Processes a file within the context.
BT_Result BT_RLP_Context_ProcessUTF16Buffer (BT_RLP_ContextC *contextp, const BT_Char16 *inbuf, BT_UInt32 inlen, BT_LanguageID lid)
 Processes a buffer of text in UTF-16 encoding.
void BT_RLP_Context_SetPropertyValue (BT_RLP_ContextC *contextp, const char *property_name, const char *property_value)
 Sets a string property for communication with processors.
BT_RLP_EnvironmentCBT_RLP_Environment_Create (void)
 Creates a new RLP environment.
void BT_RLP_Environment_Destroy (BT_RLP_EnvironmentC *envp)
 Deletes the RLP Environment.
void BT_RLP_Environment_DestroyContext (BT_RLP_EnvironmentC *envp, BT_RLP_ContextC *contextp)
 Destroys a context created with GetContextFromFile or GetContextFromBuffer.
BT_Result BT_RLP_Environment_GetContextFromBuffer (BT_RLP_EnvironmentC *envp, const unsigned char *contextspec, BT_UInt32 len, BT_RLP_ContextC **contextpp)
 Creates a context within the current environment, initialized from a buffer.
BT_Result BT_RLP_Environment_GetContextFromFile (BT_RLP_EnvironmentC *envp, const char *context_path, BT_RLP_ContextC **contextpp)
 Creates a context using the current environment, initialized from a file.
void BT_RLP_Environment_GetProcessorReport (const BT_RLP_EnvironmentC *envp, char *buffer, BT_UInt32 buffer_size)
 Get a report on the processors defined in an environment.
bool BT_RLP_Environment_HasLicenseForLanguage (const BT_RLP_EnvironmentC *envp, BT_LanguageID lid, BT_UInt32 functionality)
 Determines if a license exists for the language and functionality.
bool BT_RLP_Environment_HasLicenseForNamedFeature (const BT_RLP_EnvironmentC *envp, const char *feature, BT_UInt32 functionality)
 Determines if a license exists for the feature and functionality.
bool BT_RLP_Environment_HasLicenses (const BT_RLP_EnvironmentC *envp)
 Determines if there are language and feature licenses in the environment.
BT_Result BT_RLP_Environment_InitializeFromBuffer (BT_RLP_EnvironmentC *envp, const unsigned char *buffer, BT_UInt32 len)
 Initializes an environment from a buffer containing configuration information.
BT_Result BT_RLP_Environment_InitializeFromFile (BT_RLP_EnvironmentC *envp, const char *pathname)
 Initializes an environment from a configuration file.
const char * BT_RLP_Environment_RootDirectory (void)
 Queries the BT_ROOT/rlp directory.
void BT_RLP_Environment_SetBTRootDirectory (const char *root_directory_pathname)
 Sets the BT_ROOT directory, globally.
void BT_RLP_Environment_SetLogCallbackFunction (void *info_p, BT_Log_callback_function fcn_p)
 Sets a log callback function.
void BT_RLP_Environment_SetLogLevel (const char *log_level_string)
 Sets the logging control for RLP.
bool BT_RLP_Library_VersionIsCompatible (unsigned long vers)
 C++ library compatibility check.
unsigned long BT_RLP_Library_VersionNumber (void)
 C++ library version number.
const char * BT_RLP_Library_VersionString (void)
 C++ library version number as C string.
void BT_RLP_NE_Iterator_Destroy (BT_RLP_NE_IteratorC *neitp)
 Destroys the Named Entity Iterator.
BT_UInt32 BT_RLP_NE_Iterator_GetEndCharacterOffset (const BT_RLP_NE_IteratorC *neitp)
 Returns the character offset + 1 for the last character in the named entity.
BT_UInt32 BT_RLP_NE_Iterator_GetEndTokenIndex (const BT_RLP_NE_IteratorC *neitp)
 Returns the token index + 1 of the last token in the named entity.
BT_UInt32 BT_RLP_NE_Iterator_GetIndex (const BT_RLP_NE_IteratorC *neitp)
 Returns the index of the named entity in the vector of named entities.
const BT_Char16BT_RLP_NE_Iterator_GetNamedEntity (const BT_RLP_NE_IteratorC *neitp)
 Returns normalized text of the current Named Entity.
const BT_Char16BT_RLP_NE_Iterator_GetRawNamedEntity (const BT_RLP_NE_IteratorC *neitp)
 Retrieves NE as it occurred in the text.
BT_UInt32 BT_RLP_NE_Iterator_GetStartCharacterOffset (const BT_RLP_NE_IteratorC *neitp)
 Returns the character offset for the first character in the named entity.
BT_UInt32 BT_RLP_NE_Iterator_GetStartTokenIndex (const BT_RLP_NE_IteratorC *neitp)
 Returns the token index of the first token in the named entity.
BT_UInt32 BT_RLP_NE_Iterator_GetType (const BT_RLP_NE_IteratorC *neitp)
 Returns NE type.
bool BT_RLP_NE_Iterator_Next (BT_RLP_NE_IteratorC *neitp)
 Advances to the next named entity.
BT_UInt32 BT_RLP_NE_Iterator_Size (const BT_RLP_NE_IteratorC *neitp)
 Returns the total number of named entities.
BT_RLP_NE_IteratorFactoryCBT_RLP_NE_IteratorFactory_Create ()
 Create a NE iterator factory.
BT_RLP_NE_IteratorCBT_RLP_NE_IteratorFactory_CreateIterator (BT_RLP_NE_IteratorFactoryC *nifp, BT_RLP_ContextC *contextp)
 Returns a new Named Entity iterator.
void BT_RLP_NE_IteratorFactory_Destroy (BT_RLP_NE_IteratorFactoryC *nifp)
 Destroys a NE iterator factory.
bool BT_RLP_NE_IteratorFactory_GetConsistentType (const BT_RLP_NE_IteratorFactoryC *nifp)
 Gets the flag indicating whether named entity type should be consistent.
bool BT_RLP_NE_IteratorFactory_GetStripAffixes (const BT_RLP_NE_IteratorFactoryC *nifp)
 Reports the current affix handling.
void BT_RLP_NE_IteratorFactory_SetConsistentType (BT_RLP_NE_IteratorFactoryC *nifp, bool flag)
 Sets the flag indicating whether the iterator should return consistent named entity type.
void BT_RLP_NE_IteratorFactory_SetStripAffixes (BT_RLP_NE_IteratorFactoryC *nifp, bool strip)
 Sets affix handling.
const BT_Char16BT_RLP_Result_AsCountedUTF16String (const BT_RLP_ResultC *resp, BT_UInt32 *lenp)
 Returns a counted UTF-16 string.
void BT_RLP_Result_AsIntegerPair (const BT_RLP_ResultC *resp, BT_UInt32 *ap, BT_UInt32 *bp)
 Returns a pair of unsigned 32-bit integers.
void BT_RLP_Result_AsIntegerTriple (const BT_RLP_ResultC *resp, BT_UInt32 *ap, BT_UInt32 *bp, BT_UInt32 *cp)
 Returns a triple of unsigned 32-bit integers.
void BT_RLP_Result_AsIntegerUTF16StringVectorPair (const BT_RLP_ResultC *resp, BT_UInt32 *indexp, const BT_RLP_Result_UTF16StringVectorC **stringspp)
 Returns a pair consisting of an integer (token) index and a vector of UTF-16 Strings.
BT_Int32 BT_RLP_Result_AsSignedInteger (const BT_RLP_ResultC *resp)
 Returns a signed 32-bit integer.
const BT_Char8BT_RLP_Result_AsString (const BT_RLP_ResultC *resp)
 Returns a null-terminated 8-bit string.
BT_UInt32 BT_RLP_Result_AsUnsignedInteger (const BT_RLP_ResultC *resp)
 Returns an unsigned 32-bit integer.
void BT_RLP_Result_AsUnsignedIntegerVector (const BT_RLP_ResultC *resp, BT_UInt32 *vector, BT_UInt32 size)
 Returns a vector of unsigned 32-bit integers.
const BT_Char16BT_RLP_Result_AsUTF16String (const BT_RLP_ResultC *resp)
 Returns a null-terminated UTF-16 encoded Unicode string.
const BT_Char16BT_RLP_Result_UTF16StringVector_Get (const BT_RLP_Result_UTF16StringVectorC *u16vec, BT_UInt32 index)
 Retrieves a UTF-16 string.
BT_UInt32 BT_RLP_Result_UTF16StringVector_Size (const BT_RLP_Result_UTF16StringVectorC *u16vec)
 Examines the size of the UTF-16 string vector.
const BT_RLP_ResultCBT_RLP_ResultIterator_At (BT_RLP_ResultIteratorC *resitp, size_t index)
 Returns the result at index from the set.
const BT_RLP_ResultCBT_RLP_ResultIterator_Next (BT_RLP_ResultIteratorC *resitp)
 Returns the next result in the set.
void BT_RLP_ResultIterator_Reset (BT_RLP_ResultIteratorC *resitp)
 Resets the iterator to the beginning.
BT_UInt32 BT_RLP_ResultIterator_Size (const BT_RLP_ResultIteratorC *resitp)
 Returns the number of results available.
BT_RLP_ResultType BT_RLP_ResultIterator_Type (const BT_RLP_ResultIteratorC *resitp)
 Returns the type of results returned by the iterator.
void BT_RLP_TokenIterator_Destroy (BT_RLP_TokenIteratorC *tkitp)
 Destroys a token iterator.
const BT_Char16BT_RLP_TokenIterator_GetCompoundComponent (const BT_RLP_TokenIteratorC *tkitp, BT_UInt32 index)
 Returns the Nth compound component.
const BT_Char16BT_RLP_TokenIterator_GetDictionayForm (const BT_RLP_TokenIteratorC *tkitp)
 Returns the dictionary form (stem) of the token.
BT_UInt32 BT_RLP_TokenIterator_GetEndOffset (const BT_RLP_TokenIteratorC *tkitp)
 Returns the end offset for the token in the UTF-16 text (BT_RLP_TOKEN_OFFSET).
BT_UInt32 BT_RLP_TokenIterator_GetIndex (const BT_RLP_TokenIteratorC *tkitp)
 Returns the index of the token in the stream of tokens (BT_RLP_TOKEN).
const BT_Char16BT_RLP_TokenIterator_GetLemmaForm (const BT_RLP_TokenIteratorC *tkitp)
 Returns the dictionary form (lemma) of the token.
const BT_Char16BT_RLP_TokenIterator_GetManyToOneNormalizedForm (const BT_RLP_TokenIteratorC *tkitp)
 Returns the many-to-one normalized form of the token.
const BT_Char16BT_RLP_TokenIterator_GetNormalForm (const BT_RLP_TokenIteratorC *tkitp)
 Returns the normalized form of the token.
BT_UInt32 BT_RLP_TokenIterator_GetNumberOfAnalyses (const BT_RLP_TokenIteratorC *tkitp)
 Returns the number of alternative analyses for the token.
BT_UInt32 BT_RLP_TokenIterator_GetNumberOfCompoundComponents (const BT_RLP_TokenIteratorC *tkitp)
 Returns the number of compound components for the token. (BT_RLP_COMPOUND).
BT_UInt32 BT_RLP_TokenIterator_GetNumberOfReadings (const BT_RLP_TokenIteratorC *tkitp)
 Returns the number of readings for the token (BT_RLP_READING).
const char * BT_RLP_TokenIterator_GetPartOfSpeech (const BT_RLP_TokenIteratorC *tkitp)
 Returns the part-of-speech tag for the token.
const BT_Char16BT_RLP_TokenIterator_GetReading (const BT_RLP_TokenIteratorC *tkitp, BT_UInt32 index)
 Returns the Nth reading.
const BT_Char16BT_RLP_TokenIterator_GetRootForm (const BT_RLP_TokenIteratorC *tkitp)
 Returns the dictionary form (root) of the token.
BT_UInt32 BT_RLP_TokenIterator_GetSourceId (const BT_RLP_TokenIteratorC *tkitp)
 Returns the source name ID of the token.
const BT_Char16BT_RLP_TokenIterator_GetSourceName (const BT_RLP_TokenIteratorC *tkitp, BT_UInt32 id)
 Returns the source name of the token.
BT_UInt32 BT_RLP_TokenIterator_GetStartOffset (const BT_RLP_TokenIteratorC *tkitp)
 Returns the start offset for the token in the UTF-16 text (BT_RLP_TOKEN_OFFSET).
const BT_Char16BT_RLP_TokenIterator_GetStemForm (const BT_RLP_TokenIteratorC *tkitp)
 Returns the dictionary form (stem) of the token.
const BT_Char16BT_RLP_TokenIterator_GetToken (const BT_RLP_TokenIteratorC *tkitp)
 Return the token as a string.
bool BT_RLP_TokenIterator_IsStopword (const BT_RLP_TokenIteratorC *tkitp)
 Returns whether the token is marked as a stopword (BT_RLP_STOPWORD).
bool BT_RLP_TokenIterator_Next (BT_RLP_TokenIteratorC *tkitp)
 Advances to the next token.
bool BT_RLP_TokenIterator_NextAnalysis (BT_RLP_TokenIteratorC *tkitp)
 Checks if there are more alternative analyses.
BT_UInt32 BT_RLP_TokenIterator_Size (const BT_RLP_TokenIteratorC *tkitp)
 Return the total number of tokens.
BT_RLP_TokenIteratorFactoryCBT_RLP_TokenIteratorFactory_Create (void)
 Creates a token factory iterator.
BT_RLP_TokenIteratorCBT_RLP_TokenIteratorFactory_CreateIterator (BT_RLP_TokenIteratorFactoryC *tkitfacp, const BT_RLP_ContextC *contextp)
 Returns a new token iterator.
void BT_RLP_TokenIteratorFactory_Destroy (BT_RLP_TokenIteratorFactoryC *tkitfacp)
 Destroys a token iterator factory.
bool BT_RLP_TokenIteratorFactory_GetReturnCompoundComponents (const BT_RLP_TokenIteratorFactoryC *tkitfacp)
 Returns the setting for compound components.
bool BT_RLP_TokenIteratorFactory_GetReturnReadings (const BT_RLP_TokenIteratorFactoryC *tkitfacp)
 Returns the setting for readings.
void BT_RLP_TokenIteratorFactory_SetReturnCompoundComponents (BT_RLP_TokenIteratorFactoryC *tkitfacp, bool flag)
 Specifies whether iterators should return compound components.
void BT_RLP_TokenIteratorFactory_SetReturnReadings (BT_RLP_TokenIteratorFactoryC *tkitfacp, bool flag)
 Specifies whether iterators should return readings.

Detailed Description

This file declares the public interface for the C programs to the Rosette Linguistics Platform. This is the only header file that needs to be included for normal RLP usage, as it includes all of the other interfaces comprising the RLP public API.

Define Documentation

#define BT_RLP_CLIBRARY_INTERFACE_VERSION   1

Version number of this C API of this header file.


Typedef Documentation

typedef struct BT_RLP_ContextC BT_RLP_ContextC

This structure represents a specific runtime context for extracting linguistic data from a single document. Multiple context structures can be created from a single environment, each running independently of the other. Contexts may not be shared among threads.

Use BT_RLP_Environment_GetContextFromFile() or BT_RLP_Environment_GetContextFromBuffer() to obtain this structure. Make sure to call BT_RLP_Environment_DestroyContext() when you are done with this.

See also:
BT_RLP_EnvironmentC

typedef struct BT_RLP_EnvironmentC BT_RLP_EnvironmentC

This structure represents the RLP runtime environment and is responsible for maintaining input, output, and language processors.

Call BT_RLP_Environment_Create() to obtain the pointer to this structure; only one can exist per process at a time.

See also:
BT_RLP_Environment_Create(), BT_RLP_Environment_InitializeFromFile(), BT_RLP_Environment_InitializeFromBuffer()

typedef struct BT_RLP_NE_IteratorC BT_RLP_NE_IteratorC

This structure represents a single iterator for named entities.

Create these iterators by using BT_RLP_NE_IteratorFactory_CreateIterator(). Free the resources associated with the iterator by calling BT_RLP_NE_Iterator_Destroy().

Note:
This class will return null-terminated strings from its internal storage. You must copy the data before calling any other function that returns a string.
See also:
BT_RLP_NE_Iterator_Next(), BT_RLP_NE_Iterator_Size(), BT_RLP_NE_Iterator_GetRawNamedEntity(), BT_RLP_NE_Iterator_GetNamedEntity(),

typedef struct BT_RLP_NE_IteratorFactoryC BT_RLP_NE_IteratorFactoryC

This structure represents a factory to create a structure of type BT_RLP_NE_IteratorC. To use this structure, create a factory and then call into it to set the parameters for the iterator(s) that you need.

Call BT_RLP_NE_IteratorFactory_CreateIterator() with a pointer to BT_RLP_ContextC

to get an iterator that reflects the current settings of the factory.

You must free the resources associated with the factory by calling BT_RLP_NE_IteratorFactory_Destroy().

See also:
BT_RLP_NE_IteratorFactory_SetStripAffixes(), BT_RLP_NE_IteratorFactory_GetStripAffixes()

typedef struct BT_RLP_Result_UTF16StringVectorC BT_RLP_Result_UTF16StringVectorC

This structure represents a vector of UTF-16 strings. BT_RLP_Result_AsIntegerUTF16StringVectorPair() can be used to obtain this structure.

See also:
BT_RLP_Result_UTF16StringVector_Size(), BT_RLP_Result_UTF16StringVector_Get()

typedef struct BT_RLP_ResultC BT_RLP_ResultC

This datatype encapsulates an individual result returned by a result iterator. Use functions whose name start with BT_RLP_Result_ to examine the result.

Because each result can contain different types of data, you must use the appropriate accessor for the result. The result data is described in the language processor's documentation.

See also:
BT_RLP_Result_AsCountedUTF16String(), BT_RLP_Result_AsIntegerUTF16StringVectorPair(), BT_RLP_Result_AsString(), BT_RLP_Result_AsUnsignedInteger(), BT_RLP_Result_AsSignedInteger(), BT_RLP_Result_AsIntegerPair(), BT_RLP_Result_AsIntegerTriple(), BT_RLP_Result_AsUnsignedIntegerVector()

typedef struct BT_RLP_ResultIteratorC BT_RLP_ResultIteratorC

This structure is used to access to the results of RLP processing. You get the iterator for a context's results through the BT_RLP_Context_GetResultIterator() function.

Use functions whose name start with BT_RLP_ResultIterator_ to access an indiviual token.

Note that the Next and At actually returns the same address when called for the same iterator. It actually changes what BT_RLP_Result_ functions return. If you do this:

      BT_RLP_ResultIteratorC *rip 
            = BT_RLP_Context_GetResultIterator(cxp, BT_RLP_TOKEN);
      const BT_RLP_ResultC *resp0 = BT_RLP_ResultIterator_Next(); // First token
      const BT_RLP_ResultC *resp1 = BT_RLP_ResultIterator_Next(); // Second token
      
Both resp0 and resp1 actually point to the same same address, and BT_RLP_RESULT_AsUTF16String(resp0) would return the text of the second token, rather than the fist token. The string return by BT_RLP_RESULT_AsUTF16String(resp0) would need to be duplicated and stored separetely before calling Next(), if the first token is needed later.

It is your responsibility to free the resource associated with the iterator when you are finished with it by calling BT_RLP_Context_DestroyResultIterator().

See also:
BT_RLP_Context_GetResultIterator(), BT_RLP_Context_DestroyResultIterator(), BT_RLP_ResultIterator_Size(), BT_RLP_ResultIterator_Type(), BT_RLP_ResultIterator_Next(), BT_RLP_ResultIterator_At(), BT_RLP_ResultIterator_Reset()

typedef struct BT_RLP_TokenIteratorC BT_RLP_TokenIteratorC

This structure represents an iterator that iterates over tokens.

The structure can be obtained by calling BT_RLP_TokenIteratorFactory_CreateIterator(). Call BT_RLP_TokenIterator_Destroy() to free the resources.

typedef struct BT_RLP_TokenIteratorFactoryC BT_RLP_TokenIteratorFactoryC

This structure represents a "factory" to create an token iterator represented by BT_RLP_Token_IteratorC.

To access results of processing, create a factory by calling BT_RLP_TokenIteratorFactory_Create(), and then call into it to set the parameters for the iterator(s) that you need by calling BT_RLP_TokenIteratorFactory_SetReturnCompoundComponents() or BT_RLP_TokenIteratorFactory_SetReturnReadings(). Call BT_RLP_TokenIteratorFactory_CreateIterator() with a BT_RLP_ContextC to get an iterator that reflects the current settings of the factory.

You must free the factory and associated resources by calling BT_RLP_TokenIteratorFactory_Destroy().

See also:
BT_RLP_TokenIteratorFactory_Create(), BT_RLP_TokenIteratorFactory_Destroy(), BT_RLP_TokenIteratorFactory_CreateIterator()


Function Documentation

bool BT_RLP_CLibrary_VersionIsCompatible ( unsigned long  vers  ) 

This function checks if the header used is compatible with the library being linked. It also checks if the correct underlying RLP core library is being linked.

Parameters:
vers You MUST give BT_RLP_LIBRARY_INTERFACE_VERSION here.
Returns:
true only if the header used to compile this C source file is compatible with the C binding library being used, AND the expected underlying RLP core libray is being used.

unsigned long BT_RLP_CLibrary_VersionNumber (  ) 

This function returns an integer representing the version of the linked C API (not C++) library. Use BT_RLP_Library_VersionNumber() to check the underlying C++ library.

Returns:
The version number.

const char* BT_RLP_CLibrary_VersionString (  ) 

This function returns a C string represents the version of the linked C API (not C++) library. Use BT_RLP_Library_VersionString() to check the underlying C++ library.

Returns:
The version string.

void BT_RLP_Context_DestroyResultIterator ( const BT_RLP_ContextC contextp,
BT_RLP_ResultIteratorC resitp 
)

This function destroys an iterator created with the BT_RLP_Context_GetResultIterator().

Parameters:
contextp Pointer to the RLP context.
resitp Pointer to the iterator to be destroyed.

void BT_RLP_Context_DestroyResultStorage ( BT_RLP_ContextC contextp  ) 

This function frees any storage allocated for results in the context. This operation is automatic when the context is destroyed or when a new Process call is made, but for some purposes the caller might wish to do this explicitly.

Parameters:
contextp Pointer to the RLP context.

BT_UInt32 BT_RLP_Context_GetIntegerResult ( const BT_RLP_ContextC contextp,
BT_RLP_ResultType  type 
)

This function returns a 32-bit integer containing the value of the supplied result type.

Parameters:
contextp Pointer to the RLP context.
type The type of result to return.
Returns:
An integer

void BT_RLP_Context_GetProcessorReport ( const BT_RLP_ContextC contextp,
char *  buffer,
BT_UInt32  buffer_size 
)

This function fills in a string with a report on the processors defined in the environment. The string is delimited with vertical bars:

      proc0|desc0|proc1|desc1|...|procN|descN||

The string will be null-terminated on return. If the string was truncated, the trailing | will be missing.

Parameters:
contextp Pointer to the RLP context.
buffer Destination for the report.
buffer_size Tthe maximum length of the report buffer.

bool BT_RLP_Context_GetPropertyValue ( const BT_RLP_ContextC contextp,
const char *  property_name,
char *  output,
unsigned  max_output 
)

This function retrieves the value of a named property on the context. This API is provided so that your application can see what it has stored for a property value.

Parameters:
contextp Pointer to the RLP context.
property_name The name of the property to get.
output The buffer to receive the value.
max_output The size of the buffer.
Returns:
true if the property was defined, false otherwise.

BT_RLP_ResultIteratorC* BT_RLP_Context_GetResultIterator ( const BT_RLP_ContextC contextp,
BT_RLP_ResultType  type 
)

This function returns a result iterator for the supplied result type.

It is your responsibility to destroy the iterator when you are done with it using BT_RLP_Context_DestroyResultIterator().

Parameters:
contextp Pointer to the RLP context.
type The type of result to return an iterator for.
Returns:
The iterator, or 0 if it cannot be allocated.

const BT_Char8* BT_RLP_Context_GetStringResult ( const BT_RLP_ContextC contextp,
BT_RLP_ResultType  type 
)

This function returns a pointer to a null-terminated ASCII character string containing the value of the supplied result type.

Parameters:
contextp Pointer to the RLP context.
type The type of result to return.
Returns:
Pointer to a null-terminated ASCII string.

const BT_UInt32* BT_RLP_Context_GetUnsignedIntegerArrayResults ( const BT_RLP_ContextC contextp,
BT_RLP_ResultType  type,
BT_UInt32 countp 
)

This function returns a pointer to result data for the supplied result type in the form of an array of 32-bit unsgined integers.

You may only call this funtion for those result types which are defined to support it. The results that return character data do not support it.

Parameters:
contextp Pointer to the RLP context.
type The type of result to return an iterator for.
countp (output) Pointer to a 32-bit unsigned integer variable to which the number of items of the resulting array will be stored.
Returns:
The pointer to the result array.

const BT_Char16* BT_RLP_Context_GetUTF16StringResult ( const BT_RLP_ContextC contextp,
BT_RLP_ResultType  type,
BT_UInt32 reslenp 
)

This function returns a pointer to a UTF-16 string containing the value of the supplied result type. The string may not be terminated by 0.

Parameters:
contextp Pointer to the RLP context.
type The type of result to return.
reslenp Pointer to an unsigned 32-bit integer variable to which the length of the result string will be stored.
Returns:
Pointer to a UTF-16 string.

BT_Result BT_RLP_Context_ProcessBuffer ( BT_RLP_ContextC contextp,
const unsigned char *  inbuf,
BT_UInt32  inlen,
BT_LanguageID  lid,
const char *  character_encoding,
const char *  mime_type 
)

This function processes the text in a buffer within the context.

This function can be called repeatedly to process multiple buffers with a single context. Existing result iterators are invalid after ProcessFile is called and must not be used.

Parameters:
contextp Pointer to the RLP context.
inbuf Pointer to the buffer containing the configuration data. The buffer can be discarded after this function is called.
inlen Length (in bytes) of the buffer.
lid Language id indicating the name of the language in the input. Specify BT_LANGUAGE_UNKNOWN, if you want RLI (see RLI in RLP Application Developer's Guide) to determin the language (RLI license required).
character_encoding Character encoding for the data. Specify 0 if you want RLI to detect the encoding (RLI license required). For acceptable encodings, see RCLU Encodings in RLP Application Developer's Guide and Unicode Converter in RLP Application Developer's Guide.
mime_type MIME-type of the data. Specify 0 if not known, or is not required by a subsequent processor. If it is not known and is required by a subsequent processor, mime_detector can detect the MIME-type. For a list of the MIME-types that RLP can handle, see mime_detector in RLP Application Developer's Guide.
Return values:
BT_OK The file was processed correctly.

BT_Result BT_RLP_Context_ProcessFile ( BT_RLP_ContextC contextp,
const char *  infile,
BT_LanguageID  lid,
const char *  character_encoding,
const char *  mime_type 
)

This function processes the text in a file within the context. The file is read in its entirety and then passed to the context's processors.

This function can be called repeatedly to process multiple files with a single context. Existing result iterators are invalid after ProcessFile is called and must not be used.

Parameters:
contextp Pointer to the RLP context.
infile The pathname of the input file.
lid Language id indicating the name of the language in the input. Specify BT_LANGUAGE_UNKNOWN, if you want RLI (see RLI in RLP Application Developer's Guide) to determine the language (RLI license required).
character_encoding Character encoding for the data. Specify 0 if you want RLI to detect the encoding (RLI license required). For acceptable encodings, see RCLU Encodings in RLP Application Developer's Guide and Unicode Converter in RLP Application Developer's Guide.
mime_type MIME-type of the data. Specify 0 if not known, or is not required by a subsequent processor. If it is not known and is required by a subsequent processor, mime_detector can detect the MIME-type. For a list of the MIME-types that RLP can handle, see mime_detector in RLP Application Developer's Guide.
Return values:
BT_OK The file was processed correctly.

BT_Result BT_RLP_Context_ProcessUTF16Buffer ( BT_RLP_ContextC contextp,
const BT_Char16 inbuf,
BT_UInt32  inlen,
BT_LanguageID  lid 
)

This function processes a buffer of native byte-order UTF-16 text within the context.

Parameters:
contextp Pointer to the RLP context.
inbuf Pointer to the buffer containing the configuration data. The buffer can be discarded after this function is called.
inlen Length (in bytes) of the buffer.
lid Language id indicating the name of the language in the input. Specify BT_LANGUAGE_UNKNOWN, if you want RLI (see RLI in RLP Application Developer's Guide) to determin the language (RLI license required).
Return values:
BT_OK The file was processed correctly.

void BT_RLP_Context_SetPropertyValue ( BT_RLP_ContextC contextp,
const char *  property_name,
const char *  property_value 
)

This function sets a named property on the context to the specified value. Specific processors document properties that they read to control their operations. In some cases, you may have to construct a string representation of a numeric value to set a property.

Parameters:
contextp Pointer to the RLP context.
property_name The name of the property to set (e.g. "com.basistech.rexml.output_pathname")
property_value The value to set for the property.

BT_RLP_EnvironmentC* BT_RLP_Environment_Create ( void   ) 

This function creates a new, uninitialized RLP environment, and return a pointer to the structure representing it.

The pointer and the resources associated it must be freed by calling BT_RLP_Environment_Destro() when you are finished with it.

If desired, BT_RLP_Environment_SetXxxxxx() must be called prior to this call.

Returns:
Pointer to the structure representing the newly created environment, or 0 if failed.

void BT_RLP_Environment_Destroy ( BT_RLP_EnvironmentC envp  ) 

This function destroys the RLP Environment and releases associated resources.

Parameters:
envp Pointer to the RLP environment.

void BT_RLP_Environment_DestroyContext ( BT_RLP_EnvironmentC envp,
BT_RLP_ContextC contextp 
)

This function destroys a context created with the GetContextFromFile or GetContextFromBuffer functions. The context should not be used after it has been destroyed as the pointer will no longer be valid.

Parameters:
envp Pointer to the RLP environment.
contextp Pointer to the context to be destroyed.

BT_Result BT_RLP_Environment_GetContextFromBuffer ( BT_RLP_EnvironmentC envp,
const unsigned char *  contextspec,
BT_UInt32  len,
BT_RLP_ContextC **  contextpp 
)

This function creates a new context within the current environment as specified in the XML context description given in the C string.

The character data in the buffer should be encoded in UTF-8.

Parameters:
envp Pointer to the RLP environment.
contextspec C string contains the XML data. The string can be discarded after this function is called.
len Length of the above string.
contextpp Address of a variable of type (BT_RLP_ContextC *) to which a pointer to the newly created context will be stored.
Return values:
BT_OK The context was allocated and initialized correctly.

BT_Result BT_RLP_Environment_GetContextFromFile ( BT_RLP_EnvironmentC envp,
const char *  context_path,
BT_RLP_ContextC **  contextpp 
)

This function creates a context with the current environment configuration, based on the named context definition.

The character data in the file should be encoded in UTF-8.

Parameters:
envp Pointer to the RLP environment.
context_path Pathname to the context configuration file.
contextpp Address of the pointer for the created context.
Return values:
BT_OK The context was allocated and initialized correctly.

void BT_RLP_Environment_GetProcessorReport ( const BT_RLP_EnvironmentC envp,
char *  buffer,
BT_UInt32  buffer_size 
)

This function fills in a string with a report on the processors defined in the environment. The string is delimited with vertical bars:

      proc0|desc0|proc1|desc1|...|procN|descN||

The string will be null-terminated on return. If the string was truncated, the trailing | will be missing.

Parameters:
envp Pointer to the RLP environment
buffer C string buffer where the report is stored.
buffer_size the maximum length of the report buffer.

bool BT_RLP_Environment_HasLicenseForLanguage ( const BT_RLP_EnvironmentC envp,
BT_LanguageID  lid,
BT_UInt32  functionality 
)

This function determines if a license exists for the language and functionality.

Parameters:
envp Pointer to the RLP environment
lid Language ID.
functionality Functionality.
Return values:
true(1) There is a license for the language and functionality.
See also:
bt_language_names.h and bt_rlp_license_types.h.

bool BT_RLP_Environment_HasLicenseForNamedFeature ( const BT_RLP_EnvironmentC envp,
const char *  feature,
BT_UInt32  functionality 
)

This function determines if a license exists for the feature and functionality.

Parameters:
envp Pointer to the RLP environment.
feature Feature.
functionality Functionality.
Return values:
true There is a license for the feature and functionality.
See also:
bt_rlp_license_types.h.

bool BT_RLP_Environment_HasLicenses ( const BT_RLP_EnvironmentC envp  ) 

This function determines if there are language and feature licenses in the environment.

Parameters:
envp Pointer to the RLP environment
Return values:
true(1) There are some language and feature licenses.

BT_Result BT_RLP_Environment_InitializeFromBuffer ( BT_RLP_EnvironmentC envp,
const unsigned char *  buffer,
BT_UInt32  len 
)

This function initializes the environment from an XML configuration stored in the buffer.

Parameters:
envp Pointer to the RLP environment.
buffer The buffer containing the XML data. The buffer can be discarded after this function is called.
len The length of the buffer.
Return values:
BT_OK The environment was initialized.

BT_Result BT_RLP_Environment_InitializeFromFile ( BT_RLP_EnvironmentC envp,
const char *  pathname 
)

This function initializes the environment from the XML configuration file specified by the argument.

Parameters:
envp Pointer to the RLP environment.
pathname The pathname to the configuration file.
Return values:
BT_OK The environment was initialized.

const char* BT_RLP_Environment_RootDirectory ( void   ) 

This static function returns the current location of the rlp subdirectory of the installation directory. It returns 0 in the event that no root directory is established. Generally, RLP does not function without a setting for the Basis root directory.

Returns:
the BT_ROOT/rlp directory, or 0 if none.

void BT_RLP_Environment_SetBTRootDirectory ( const char *  root_directory_pathname  ) 

This static function establishes the root directory of a Basis Technology installation. This directory is known as BT_ROOT or the Basis root directory. RLP-specific files are found under BT_ROOT/rlp.

Parameters:
root_directory_pathname String specifying the root of the installation.

void BT_RLP_Environment_SetLogCallbackFunction ( void *  info_p,
BT_Log_callback_function  fcn_p 
)

This function specifies a callback function to receive logging data, which is encoded in UTF-8. This should be called before any other RLP function. All log channels will be muted by this call. A subsequent call to SetLogLevel should be used to control the log level.

Note:
The log message is not necessarily a complete line or message.
      static x_log_callback(void* info_p, int channel, char const* message)
      {
          fprintf(stderr, "%d\t%s\n", channel, message);
      }

      BT_RLP_Environment_SetLogCallbackFunction(0, x_log_callback);
      BT_RLP_Environment_SetLogLevel("error");

Parameters:
info_p Void pointer stored and passed to the callback function each time it is called. The interpretation of this value is solely at the discretion of the client.
fcn_p Function pointer to the callback function, which returns void and takes a void * (the value of info_p), int (channel number), and a char const * (the message).

void BT_RLP_Environment_SetLogLevel ( const char *  log_level_string  ) 

This static function controls the level of diagnostic logging for RLP. Call it before a BT_RLP_Environment is initialized. There are three logging channels that can be enabled:

error
Fatal error messages.
warning
Non-fatal warning messages.
info
Informational messages.

By default, without calling SetLogLevel, "error" level messages will be logged. To turn off all logging, call SetLogLevel("none").

These can be combined to log multiple levels by separating each one with a comma, e.g., "error,warning". The string "all" is synonymous with "error,warning,info".

The environment variable BT_RLP_LOG_LEVEL can be set similarly to get the same behavior without explicitly calling SetLogLevel.

      BT_RLP_Environment_SetLogLevel("error,warning");

Parameters:
log_level_string String specifying which of the standard channels should be logged.

bool BT_RLP_Library_VersionIsCompatible ( unsigned long  vers  ) 

This function checks if the current version of the underlying C++ API library is compatible with the version of the library the object was compiled with.

Parameters:
vers This must be BT_RLP_LIBRARY_INTERFACE_VERSION from bt_rlp_version.h.
Returns:
true if the compatible version of the library is in use.

unsigned long BT_RLP_Library_VersionNumber ( void   ) 

This function returns the version number of the underlying C++ class library as an integer.

Returns:
The version number of the C++ library.

const char* BT_RLP_Library_VersionString ( void   ) 

This function returns the version number of the underlying C++ class library as a string.

Returns:
The version number of the C++ library.

void BT_RLP_NE_Iterator_Destroy ( BT_RLP_NE_IteratorC neitp  ) 

This function destroys the Named Entity Iterator.

Parameters:
neitp Pointer to the Named Entity iterator.

BT_UInt32 BT_RLP_NE_Iterator_GetEndCharacterOffset ( const BT_RLP_NE_IteratorC neitp  ) 

This function returns the character offset + 1 for the last character in the named entity.

Parameters:
neitp Pointer to the Named Entity iterator.
Returns:
the character offset + 1 for the last character in the named entity.

BT_UInt32 BT_RLP_NE_Iterator_GetEndTokenIndex ( const BT_RLP_NE_IteratorC neitp  ) 

This function returns the token index + 1 of the last token in the named entity.

Parameters:
neitp Pointer to the Named Entity iterator.
Returns:
the token index + 1 of the last token in the named entity.

BT_UInt32 BT_RLP_NE_Iterator_GetIndex ( const BT_RLP_NE_IteratorC neitp  ) 

This function returns the index of the named entity in the vector of named entities.

Parameters:
neitp Pointer to the Named Entity iterator.
Returns:
the index of the named entity in the vector of named entities.

const BT_Char16* BT_RLP_NE_Iterator_GetNamedEntity ( const BT_RLP_NE_IteratorC neitp  ) 

This function returns normalized text of the current Named Entity.

Note:
If no normalized token is available, the ordinary token will be returned. If the original text had whitespace between the tokens, it is replaced by a single space.
Parameters:
neitp Pointer to the Named Entity iterator.
Returns:
Normalized text of the named entity.

const BT_Char16* BT_RLP_NE_Iterator_GetRawNamedEntity ( const BT_RLP_NE_IteratorC neitp  ) 

This function returns the Named Entity as it occurred in the text.

Parameters:
neitp Pointer to the Named Entity iterator.
Returns:
The named entity as it occurred in the text

BT_UInt32 BT_RLP_NE_Iterator_GetStartCharacterOffset ( const BT_RLP_NE_IteratorC neitp  ) 

This function returns the character offset for the first character in the named entity.

Parameters:
neitp Pointer to the Named Entity iterator.
Returns:
the character offset for the first character in the named entity.

BT_UInt32 BT_RLP_NE_Iterator_GetStartTokenIndex ( const BT_RLP_NE_IteratorC neitp  ) 

This function returns the token index of the first token in the named entity.

Parameters:
neitp Pointer to the Named Entity iterator.
Returns:
the token index of the first token in the named entity.

BT_UInt32 BT_RLP_NE_Iterator_GetType ( const BT_RLP_NE_IteratorC neitp  ) 

This function returns the Named Entity type.

Parameters:
neitp Pointer to the Named Entity iterator.
Returns:
Type of the current named entity.
See also:
<bt_rlp_ne_types.h>

bool BT_RLP_NE_Iterator_Next ( BT_RLP_NE_IteratorC neitp  ) 

This function advances the iterator to the next named entity.

Parameters:
neitp Pointer to the Named Entity iterator.
Returns:
true if successful. false if at the last token.

BT_UInt32 BT_RLP_NE_Iterator_Size ( const BT_RLP_NE_IteratorC neitp  ) 

This function returns the total number of named entities.

Parameters:
neitp Pointer to the Named Entity iterator.
Returns:
The total number of tokens.

BT_RLP_NE_IteratorFactoryC* BT_RLP_NE_IteratorFactory_Create (  ) 

This function creates a Named Entity iterator factory. An NE iterator factory is created with the default properties.

Returns:
Pointer to the factory. 0 if operator new is incapable of allocation memory for the factory.

BT_RLP_NE_IteratorC* BT_RLP_NE_IteratorFactory_CreateIterator ( BT_RLP_NE_IteratorFactoryC nifp,
BT_RLP_ContextC contextp 
)

This function returns a new Named Entity token iterator. The new iterator will iterate over the tokens of the supplied context.

Parameters:
nifp Pointer to the Named Entity iterator factory.
contextp Pointer to the context to iterate over.
Returns:
Pointer to the Named Entity iterator.

void BT_RLP_NE_IteratorFactory_Destroy ( BT_RLP_NE_IteratorFactoryC nifp  ) 

This function destroys a Named Entity iterator factory.

Parameters:
nifp Pointer to the Named Entity iterator factory.

bool BT_RLP_NE_IteratorFactory_GetConsistentType ( const BT_RLP_NE_IteratorFactoryC nifp  ) 

This function gets the flag indicating whether the iterator should return consistent named entity type for the same normalized named entities.

Parameters:
nifp Pointer to the Named Entity iterator factory.
Returns:
true if consistent named entity type is enabled, false otherwise.

bool BT_RLP_NE_IteratorFactory_GetStripAffixes ( const BT_RLP_NE_IteratorFactoryC nifp  ) 

This function gets the flag indicating whether the iterator should removed affixes from named entities when returning them.

Parameters:
nifp Pointer to the Named Entity iterator factory.
Returns:
true if stripping is enabled, false otherwise.

void BT_RLP_NE_IteratorFactory_SetConsistentType ( BT_RLP_NE_IteratorFactoryC nifp,
bool  flag 
)

Sets the flag indicating whether the iterator should return consistent named entity type for the same normalized named entities.

Parameters:
nifp Pointer to the Named Entity iterator factory.
flag Set to "true" to enforce consistent named entity type.

void BT_RLP_NE_IteratorFactory_SetStripAffixes ( BT_RLP_NE_IteratorFactoryC nifp,
bool  strip 
)

This function sets the flag indicating whether the iterator should removed affixes from named entities when returning them.

Parameters:
nifp Pointer to the Named Entity iterator factory.
strip Specify whether or not strip the affixes.

const BT_Char16* BT_RLP_Result_AsCountedUTF16String ( const BT_RLP_ResultC resp,
BT_UInt32 lenp 
)

This function returns a pointer to a non-null-terminated UTF-16 encoded Unicode string and its length. Applies to BT_RLP_RAW_TEXT and BT_RLP_TRANSCRIBED_TEXT.

You should not modify the contents of the returned string.

Parameters:
resp Pointer to the result.
lenp Pointer to the variable to which the lengh will be stored.
Returns:
Constant pointer to the string, not terminated by 0.

void BT_RLP_Result_AsIntegerPair ( const BT_RLP_ResultC resp,
BT_UInt32 ap,
BT_UInt32 bp 
)

This function returns a pair of 32-bit unsigned integer values. It is used for results that are returned as integral 2-tuples. Applies to BT_RLP_TOKEN_OFFSET and BT_RLP_BASE_NOUN_PHRASE.

Parameters:
resp Pointer to the result.
ap Pointer to the variable to hold the first value.
bp Pointer to the variable to hold the second value.

void BT_RLP_Result_AsIntegerTriple ( const BT_RLP_ResultC resp,
BT_UInt32 ap,
BT_UInt32 bp,
BT_UInt32 cp 
)

This function returns a triple of 32-bit unsigned integer value It is used for results that are returned as integral 3-tuples. Applies to BT_RLP_NAMED_ENTITY and BT_RLP_SCRIPT_REGION.

Parameters:
resp Pointer to the result.
ap Pointer to the variable to hold the first value.
bp Pointer to the variable to hold the second value.
cp Pointer to the variable to hold the third value.

void BT_RLP_Result_AsIntegerUTF16StringVectorPair ( const BT_RLP_ResultC resp,
BT_UInt32 indexp,
const BT_RLP_Result_UTF16StringVectorC **  stringspp 
)

This function returns a pair consisting of an index and a vector of UTF-16 encoded Unicode strings. Applies to BT_RLP_COMPOUND, BT_RLP_READING, BT_RLP_TOKEN_VARIATIONS,BT_RLP_ALTERNATIVE_LEMMAS, BT_RLP_ALTERNATIVE_NORM, BT_RLP_ALTERNATIVE_ROOTS, BT_RLP_ALTERNATIVE_STEMS.

You should not modify the contents of the returned strings.

Parameters:
resp Pointer to the result.
indexp Address of the variable to hold the returned index value.
stringspp Pointer to a variable to which pointer to the string vector will be stored.

BT_Int32 BT_RLP_Result_AsSignedInteger ( const BT_RLP_ResultC resp  ) 

This function returns a signed 32-bit integer value. Reserved for future use.

Parameters:
resp Pointer to the result.
Returns:
The integer value.

const BT_Char8* BT_RLP_Result_AsString ( const BT_RLP_ResultC resp  ) 

This function returns a pointer to a null-terminated 8-bit character string. No particular encoding is specified and is defined by the processor generating the result. Applies to BT_RLP_PART_OF_SPEECH, BT_RLP_DETECTED_ENCODING, and BT_RLP_MIME_TYPE.

You should not modify the contents of the returned string.

Parameters:
resp Pointer to the result.
Returns:
Constant pointer to the string.

BT_UInt32 BT_RLP_Result_AsUnsignedInteger ( const BT_RLP_ResultC resp  ) 

This function returns an unsigned 32-bit integer value. Applies to BT_RLP_DETECTED_LANGUAGE, BT_RLP_STOPWORD, BT_RLP_SENTENCE_BOUNDARY, BT_RLP_TEXT_BOUNDARIES, BT_RLP_MAP_OFFSETS, BT_RLP_TOKEN_SOURCE_ID.

Parameters:
resp Pointer to the result.
Returns:
The integer value.

void BT_RLP_Result_AsUnsignedIntegerVector ( const BT_RLP_ResultC resp,
BT_UInt32 vector,
BT_UInt32  size 
)

This function returns a vector of 32-bit unsigned integer values. It generalizes AsIntegerPair and AsIntegerTriple, and supports processors that create data values organized as larger vectors of integers. Applies to BT_RLP_LANGUAGE_REGION (6 integers).

Parameters:
resp Pointer to the result.
vector A pointer to a vector of integers to hold the values.
size The number of slots in 'vector'. If this is smaller than the natural number of items for an interation, the code fills the available slots and returns no indication of truncation.

const BT_Char16* BT_RLP_Result_AsUTF16String ( const BT_RLP_ResultC resp  ) 

This function returns a pointer to a null-terminated UTF-16 encoded Unicode string. Applies to BT_RLP_TOKEN, BT_RLP_TOKEN_SOURCE_NAME, BT_RLP_STEM, BT_RLP_LEMMA, BT_RLP_MANY_TO_ONE_VARIANT_TOKEN, BT_RLP_NORMALIZED_TOKEN, BT_RLP_ROOTS, BT_RLP_GAZETTEER_NAMES.

You should not modify the contents of the returned string.

Parameters:
resp Pointer to the result.
Returns:
Constant pointer to the string.

const BT_Char16* BT_RLP_Result_UTF16StringVector_Get ( const BT_RLP_Result_UTF16StringVectorC u16vec,
BT_UInt32  index 
)

This function retrieves the indexed string from the vector.

Parameters:
u16vec The vector
index Index of the element of the vector to be retrieved.
Returns:
The UTF-16 string as an array of Char16 elements with the last element being 0.

BT_UInt32 BT_RLP_Result_UTF16StringVector_Size ( const BT_RLP_Result_UTF16StringVectorC u16vec  ) 

This function examines the number of strings in the UTF-16 string vector.

Parameters:
u16vec The vector.
Returns:
The number of UTF-16 strings stored in this vector.

const BT_RLP_ResultC* BT_RLP_ResultIterator_At ( BT_RLP_ResultIteratorC resitp,
size_t  index 
)

This function returns the result at index in the set. The pointer returned is valid until the next call the BT_RLP_ResultIterator_Next() or BT_RLP_ResultIterator_At() function. You should neither deallocate nor store it permanently.

Parameters:
resitp Pointer to the result iterator.
index the location in the set of the result you wish to retrieve
Returns:
The result specified.

const BT_RLP_ResultC* BT_RLP_ResultIterator_Next ( BT_RLP_ResultIteratorC resitp  ) 

This function returns the next result in the set. The pointer returned is valid until the next call the BT_RLP_ResultIterator_Next() of BT_RLP_ResultIterator_At() function. You should neither deallocate nor store it permanently.

Parameters:
resitp Pointer to the result iterator.
Returns:
The next result in the set.

void BT_RLP_ResultIterator_Reset ( BT_RLP_ResultIteratorC resitp  ) 

This function resets the iterator to the beginning so that a subsequent call to BT_RLP_ResultIterator_Next() will return the first element in the result.

Parameters:
resitp Pointer to the result iterator.

BT_UInt32 BT_RLP_ResultIterator_Size ( const BT_RLP_ResultIteratorC resitp  ) 

This function returns the number of results available to the iterator.

Parameters:
resitp Pointer to the result iterator.
Returns:
The number of available results.

BT_RLP_ResultType BT_RLP_ResultIterator_Type ( const BT_RLP_ResultIteratorC resitp  ) 

This function returns the type of the result that is returned by this iterator.

Parameters:
resitp Pointer to the result iterator.
Returns:
The entity type returned by this iterator.

void BT_RLP_TokenIterator_Destroy ( BT_RLP_TokenIteratorC tkitp  ) 

This function destroys a token iterator.

Parameters:
tkitp Pointer to the token iterator to be destroyed.

const BT_Char16* BT_RLP_TokenIterator_GetCompoundComponent ( const BT_RLP_TokenIteratorC tkitp,
BT_UInt32  index 
)

This function returns the Nth compound component.

Parameters:
tkitp Pointer to the token iterator.
index Index of the component to be retrieved.

const BT_Char16* BT_RLP_TokenIterator_GetDictionayForm ( const BT_RLP_TokenIteratorC tkitp  ) 

This function returns the data from BT_RLP_STEM.

Deprecated:
Deprecated in 5.4.1. Use BT_RLP_TokenIterator_GetStemForm.
Parameters:
tkitp Pointer to the token iterator.

BT_UInt32 BT_RLP_TokenIterator_GetEndOffset ( const BT_RLP_TokenIteratorC tkitp  ) 

This function returns the end offset for the token in the UTF-16 text (BT_RLP_TOKEN_OFFSET).

Parameters:
tkitp Pointer to the token iterator.

BT_UInt32 BT_RLP_TokenIterator_GetIndex ( const BT_RLP_TokenIteratorC tkitp  ) 

This function returns the index of the token in the stream of tokens (BT_RLP_TOKEN).

Parameters:
tkitp Pointer to the token iterator.

const BT_Char16* BT_RLP_TokenIterator_GetLemmaForm ( const BT_RLP_TokenIteratorC tkitp  ) 

This function returns the data from BT_RLP_LEMMA (for tokens) or BT_RLP_ALTERNATIVE_LEMMAS (for analyses). May be an empty string; 0 if this result type is not returned for the language being processed.

Parameters:
tkitp Pointer to the token iterator.

const BT_Char16* BT_RLP_TokenIterator_GetManyToOneNormalizedForm ( const BT_RLP_TokenIteratorC tkitp  ) 

This function returns the data from BT_RLP_MANY_TO_ONE_NORMALIZED_TOKEN. May be an empty string; 0 if this result type is not returned for the language being processed.

Parameters:
tkitp Pointer to the token iterator.

const BT_Char16* BT_RLP_TokenIterator_GetNormalForm ( const BT_RLP_TokenIteratorC tkitp  ) 

This function returns the data from BT_RLP_NORMALIZED_TOKEN (for tokens) or BT_RLP_ALTERNATIVE_NORM (for analyses). May be an empty string; 0 if this result type is not returned for the language being processed.

Parameters:
tkitp Pointer to the token iterator.

BT_UInt32 BT_RLP_TokenIterator_GetNumberOfAnalyses ( const BT_RLP_TokenIteratorC tkitp  ) 

This function returns the number of alternative analyses for the token.

Parameters:
tkitp Pointer to the token iterator.

BT_UInt32 BT_RLP_TokenIterator_GetNumberOfCompoundComponents ( const BT_RLP_TokenIteratorC tkitp  ) 

This function returns the number of compound components for the token. (BT_RLP_COMPOUND)

Parameters:
tkitp Pointer to the token iterator.

BT_UInt32 BT_RLP_TokenIterator_GetNumberOfReadings ( const BT_RLP_TokenIteratorC tkitp  ) 

This function returns the number of readings for the token (BT_RLP_READING).

Parameters:
tkitp Pointer to the token iterator.

const char* BT_RLP_TokenIterator_GetPartOfSpeech ( const BT_RLP_TokenIteratorC tkitp  ) 

This function returns the part-of-speech tag for the token (BT_RLP_PART_OF_SPEECH) or analysis (BT_RLP_ALTERNATIVE_PARTS_OF_SPEECH).

Parameters:
tkitp Pointer to the token iterator.

const BT_Char16* BT_RLP_TokenIterator_GetReading ( const BT_RLP_TokenIteratorC tkitp,
BT_UInt32  index 
)

This function retrieves the Nth reading. Basic Linguistic module generates more than one possible readings for a token. Use this functions to obtain each possible reading.

Parameters:
tkitp Pointer to the token iterator.
index Index of the reading to be retrieved.

const BT_Char16* BT_RLP_TokenIterator_GetRootForm ( const BT_RLP_TokenIteratorC tkitp  ) 

This function returns the data from BT_RLP_ROOTS (for tokens) or BT_RLP_ALTERNATIVE_ROOTS (for analyses). May be an empty string; 0 if this result type is not returned for the language being processed.

Parameters:
tkitp Pointer to the token iterator.

BT_UInt32 BT_RLP_TokenIterator_GetSourceId ( const BT_RLP_TokenIteratorC tkitp  ) 

This function returns the data from BT_RLP_TOKEN_SOURCE_ID.

Parameters:
tkitp Pointer to the token iterator.

const BT_Char16* BT_RLP_TokenIterator_GetSourceName ( const BT_RLP_TokenIteratorC tkitp,
BT_UInt32  id 
)

This function returns the data from BT_RLP_TOKEN_SOURCE_NAME.

Parameters:
tkitp Pointer to the token iterator.
id specifies the ID of the source name

BT_UInt32 BT_RLP_TokenIterator_GetStartOffset ( const BT_RLP_TokenIteratorC tkitp  ) 

This function returns the start offset for the token in the UTF-16 text (BT_RLP_TOKEN_OFFSET).

Parameters:
tkitp Pointer to the token iterator.

const BT_Char16* BT_RLP_TokenIterator_GetStemForm ( const BT_RLP_TokenIteratorC tkitp  ) 

This function returns the data from BT_RLP_STEM (for tokens) or BT_RLP_ALTERNATIVE_STEMS (for analyses). May be an empty string; 0 if this result type is not returned for the language being processed.

Parameters:
tkitp Pointer to the token iterator.

const BT_Char16* BT_RLP_TokenIterator_GetToken ( const BT_RLP_TokenIteratorC tkitp  ) 

This function returns the data from BT_RLP_TOKEN.

Parameters:
tkitp Pointer to the token iterator.

bool BT_RLP_TokenIterator_IsStopword ( const BT_RLP_TokenIteratorC tkitp  ) 

This function returns whether the token is marked as a stopword (BT_RLP_STOPWORD).

Parameters:
tkitp Pointer to the token iterator.

bool BT_RLP_TokenIterator_Next ( BT_RLP_TokenIteratorC tkitp  ) 

This function advances the token iterator to the next token.

Parameters:
tkitp Pointer to the token iterator.

bool BT_RLP_TokenIterator_NextAnalysis ( BT_RLP_TokenIteratorC tkitp  ) 

This function checks if there are more alternative analyses. Arabic Basic Linguistic module generates more than one possible analysis for a token. Use this functions to obtain each possible analysis.

Parameters:
tkitp Pointer to the token iterator.

BT_UInt32 BT_RLP_TokenIterator_Size ( const BT_RLP_TokenIteratorC tkitp  ) 

This function return the total number of tokens.

Parameters:
tkitp Pointer to the token iterator.

BT_RLP_TokenIteratorFactoryC* BT_RLP_TokenIteratorFactory_Create ( void   ) 

This function creates a token factory iterator with the default properties.

Returns:
The factory. This will return 0 only if operator new is incapable of allocation memory for the factory.

BT_RLP_TokenIteratorC* BT_RLP_TokenIteratorFactory_CreateIterator ( BT_RLP_TokenIteratorFactoryC tkitfacp,
const BT_RLP_ContextC contextp 
)

This function creates a new token iterator and return a pointer to the BT_RLP_TokenIteratorC structure that represents the token iterator.

Parameters:
tkitfacp Pointer to the factory.
contextp Pointer to the context to iterate over.
Returns:
Pointer to a new BT_RLP_TokenIteratorC object.
See also:
BT_RLP_TokenIteratorC

void BT_RLP_TokenIteratorFactory_Destroy ( BT_RLP_TokenIteratorFactoryC tkitfacp  ) 

This function destroys a token iterator factory.

Parameters:
tkitfacp Pointer to the factory to be destroyed.

bool BT_RLP_TokenIteratorFactory_GetReturnCompoundComponents ( const BT_RLP_TokenIteratorFactoryC tkitfacp  ) 

This function returns the setting for compound components.

Parameters:
tkitfacp Pointer to the factory.

bool BT_RLP_TokenIteratorFactory_GetReturnReadings ( const BT_RLP_TokenIteratorFactoryC tkitfacp  ) 

This function returns the setting for readings.

Parameters:
tkitfacp Pointer to the factory.

void BT_RLP_TokenIteratorFactory_SetReturnCompoundComponents ( BT_RLP_TokenIteratorFactoryC tkitfacp,
bool  flag 
)

This function specifies whether iterators from this factory will retrieve and return compound components.

Parameters:
tkitfacp Pointer to the factory.
flag Whether the iterators should read out and return compound components.

void BT_RLP_TokenIteratorFactory_SetReturnReadings ( BT_RLP_TokenIteratorFactoryC tkitfacp,
bool  flag 
)

This function specifies whether iterators from this factory will retrieve and return readings.

Parameters:
tkitfacp Pointer to the factory.
flag Whether the iterators should read out and return readings.


Copyright © 2004-2008 Basis Technology Corporation. All Rights Reserved.