27 if (!tokenizer || !tokenizer_path)
32 std::string tokenizer_id =
"sentencepiece";
62 int32_t *tokens,
size_t n_tokens_max,
size_t *n_tokens)
64 if (!tokenizer || !text || !tokens || !n_tokens)
70 impl->tokenize(text, text_len, tokens, n_tokens_max, n_tokens);
80 char *text,
size_t text_len)
82 if (!tokenizer || !tokens || !text)
88 impl->detokenize(tokens, n_tokens, text, text_len);
static Tokenizer * create(const std::string &id, const std::string &tokenizer_dir)
GGMA_STATUS ggma_create_tokenizer(ggma_tokenizer **tokenizer, const char *tokenizer_path)
Creates a GGMA tokenizer from a specified tokenizer path.
GGMA_STATUS ggma_free_tokenizer(ggma_tokenizer *tokenizer)
Frees all resources associated with a GGMA tokenizer.
GGMA_STATUS ggma_detokenize(const ggma_tokenizer *tokenizer, const int32_t *tokens, size_t n_tokens, char *text, size_t text_len)
Detokenizes a sequence of token IDs back into a text string.
GGMA_STATUS ggma_tokenize(const ggma_tokenizer *tokenizer, const char *text, size_t text_len, int32_t *tokens, size_t n_tokens_max, size_t *n_tokens)
Tokenizes an input text string into a sequence of token IDs.
struct ggma_tokenizer ggma_tokenizer
Opaque handle to a GGMA tokenizer.
This file defines the core types and status codes for GGMA API.
GGMA_STATUS
Enumeration of status codes returned by GGMA API functions.
@ GGMA_STATUS_UNEXPECTED_NULL