ONE/ggma__tokenize_8cc_source.html

/*

 * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved

 *

 * Licensed under the Apache License, Version 2.0 (the "License");

 * you may not use this file except in compliance with the License.

 * You may obtain a copy of the License at

 *

 *      http://www.apache.org/licenses/LICENSE-2.0

 *

 * Unless required by applicable law or agreed to in writing, software

 * distributed under the License is distributed on an "AS IS" BASIS,

 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 * See the License for the specific language governing permissions and

 * limitations under the License.

 */


#include "ggma_tokenize.h"

#include "ggma_types.h"

#include "tokenize/TokenizerFactory.h"


#include <string>


extern "C" {


GGMA_STATUS ggma_create_tokenizer(ggma_tokenizer **tokenizer, const char *tokenizer_path)

{

  if (!tokenizer || !tokenizer_path)

    return GGMA_STATUS_UNEXPECTED_NULL;


  try

  {

    std::string tokenizer_id = "sentencepiece";

    auto impl = ggma::TokenizerFactory::create(tokenizer_id, tokenizer_path);


    *tokenizer = reinterpret_cast<ggma_tokenizer *>(impl);

    return GGMA_STATUS_NO_ERROR;

  }

  catch (...)

  {

    return GGMA_STATUS_ERROR;

  }

}


GGMA_STATUS ggma_free_tokenizer(ggma_tokenizer *tokenizer)

{

  if (!tokenizer)

    return GGMA_STATUS_UNEXPECTED_NULL;


  try

  {

    auto impl = reinterpret_cast<ggma::Tokenizer *>(tokenizer);

    delete impl;

    return GGMA_STATUS_NO_ERROR;

  }

  catch (...)

  {

    return GGMA_STATUS_ERROR;

  }

}


GGMA_STATUS ggma_tokenize(const ggma_tokenizer *tokenizer, const char *text, size_t text_len,

                          int32_t *tokens, size_t n_tokens_max, size_t *n_tokens)

{

  if (!tokenizer || !text || !tokens || !n_tokens)

    return GGMA_STATUS_UNEXPECTED_NULL;


  try

  {

    auto impl = reinterpret_cast<const ggma::Tokenizer *>(tokenizer);

    impl->tokenize(text, text_len, tokens, n_tokens_max, n_tokens);

    return GGMA_STATUS_NO_ERROR;

  }

  catch (...)

  {

    return GGMA_STATUS_ERROR;

  }

}


GGMA_STATUS ggma_detokenize(const ggma_tokenizer *tokenizer, const int32_t *tokens, size_t n_tokens,

                            char *text, size_t text_len)

{

  if (!tokenizer || !tokens || !text)

    return GGMA_STATUS_UNEXPECTED_NULL;


  try

  {

    auto impl = reinterpret_cast<const ggma::Tokenizer *>(tokenizer);

    impl->detokenize(tokens, n_tokens, text, text_len);

    return GGMA_STATUS_NO_ERROR;

  }

  catch (...)

  {

    return GGMA_STATUS_ERROR;

  }

}


} // extern "C"

TokenizerFactory.h

ggma::TokenizerFactory::create
static Tokenizer * create(const std::string &id, const std::string &tokenizer_dir)
Definition TokenizerFactory.cc:29

ggma::Tokenizer
Definition Tokenizer.h:26

ggma_create_tokenizer
GGMA_STATUS ggma_create_tokenizer(ggma_tokenizer **tokenizer, const char *tokenizer_path)
Creates a GGMA tokenizer from a specified tokenizer path.
Definition ggma_tokenize.cc:25

ggma_free_tokenizer
GGMA_STATUS ggma_free_tokenizer(ggma_tokenizer *tokenizer)
Frees all resources associated with a GGMA tokenizer.
Definition ggma_tokenize.cc:44

ggma_detokenize
GGMA_STATUS ggma_detokenize(const ggma_tokenizer *tokenizer, const int32_t *tokens, size_t n_tokens, char *text, size_t text_len)
Detokenizes a sequence of token IDs back into a text string.
Definition ggma_tokenize.cc:79

ggma_tokenize
GGMA_STATUS ggma_tokenize(const ggma_tokenizer *tokenizer, const char *text, size_t text_len, int32_t *tokens, size_t n_tokens_max, size_t *n_tokens)
Tokenizes an input text string into a sequence of token IDs.
Definition ggma_tokenize.cc:61

ggma_tokenize.h

ggma_tokenizer
struct ggma_tokenizer ggma_tokenizer
Opaque handle to a GGMA tokenizer.
Definition ggma_tokenize.h:36

ggma_types.h
This file defines the core types and status codes for GGMA API.

GGMA_STATUS
GGMA_STATUS
Enumeration of status codes returned by GGMA API functions.
Definition ggma_types.h:35

GGMA_STATUS_NO_ERROR
@ GGMA_STATUS_NO_ERROR
Definition ggma_types.h:37

GGMA_STATUS_UNEXPECTED_NULL
@ GGMA_STATUS_UNEXPECTED_NULL
Definition ggma_types.h:44

GGMA_STATUS_ERROR
@ GGMA_STATUS_ERROR
Definition ggma_types.h:42

impl
Definition Mean.cpp:30