ONE - On-device Neural Engine
Loading...
Searching...
No Matches
ggma_tokenize.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "ggma_tokenize.h"
18#include "ggma_types.h"
20
21#include <string>
22
23extern "C" {
24
25GGMA_STATUS ggma_create_tokenizer(ggma_tokenizer **tokenizer, const char *tokenizer_path)
26{
27 if (!tokenizer || !tokenizer_path)
29
30 try
31 {
32 std::string tokenizer_id = "sentencepiece";
33 auto impl = ggma::TokenizerFactory::create(tokenizer_id, tokenizer_path);
34
35 *tokenizer = reinterpret_cast<ggma_tokenizer *>(impl);
37 }
38 catch (...)
39 {
40 return GGMA_STATUS_ERROR;
41 }
42}
43
45{
46 if (!tokenizer)
48
49 try
50 {
51 auto impl = reinterpret_cast<ggma::Tokenizer *>(tokenizer);
52 delete impl;
54 }
55 catch (...)
56 {
57 return GGMA_STATUS_ERROR;
58 }
59}
60
61GGMA_STATUS ggma_tokenize(const ggma_tokenizer *tokenizer, const char *text, size_t text_len,
62 int32_t *tokens, size_t n_tokens_max, size_t *n_tokens)
63{
64 if (!tokenizer || !text || !tokens || !n_tokens)
66
67 try
68 {
69 auto impl = reinterpret_cast<const ggma::Tokenizer *>(tokenizer);
70 impl->tokenize(text, text_len, tokens, n_tokens_max, n_tokens);
72 }
73 catch (...)
74 {
75 return GGMA_STATUS_ERROR;
76 }
77}
78
79GGMA_STATUS ggma_detokenize(const ggma_tokenizer *tokenizer, const int32_t *tokens, size_t n_tokens,
80 char *text, size_t text_len)
81{
82 if (!tokenizer || !tokens || !text)
84
85 try
86 {
87 auto impl = reinterpret_cast<const ggma::Tokenizer *>(tokenizer);
88 impl->detokenize(tokens, n_tokens, text, text_len);
90 }
91 catch (...)
92 {
93 return GGMA_STATUS_ERROR;
94 }
95}
96
97} // extern "C"
static Tokenizer * create(const std::string &id, const std::string &tokenizer_dir)
GGMA_STATUS ggma_create_tokenizer(ggma_tokenizer **tokenizer, const char *tokenizer_path)
Creates a GGMA tokenizer from a specified tokenizer path.
GGMA_STATUS ggma_free_tokenizer(ggma_tokenizer *tokenizer)
Frees all resources associated with a GGMA tokenizer.
GGMA_STATUS ggma_detokenize(const ggma_tokenizer *tokenizer, const int32_t *tokens, size_t n_tokens, char *text, size_t text_len)
Detokenizes a sequence of token IDs back into a text string.
GGMA_STATUS ggma_tokenize(const ggma_tokenizer *tokenizer, const char *text, size_t text_len, int32_t *tokens, size_t n_tokens_max, size_t *n_tokens)
Tokenizes an input text string into a sequence of token IDs.
struct ggma_tokenizer ggma_tokenizer
Opaque handle to a GGMA tokenizer.
This file defines the core types and status codes for GGMA API.
GGMA_STATUS
Enumeration of status codes returned by GGMA API functions.
Definition ggma_types.h:35
@ GGMA_STATUS_NO_ERROR
Definition ggma_types.h:37
@ GGMA_STATUS_UNEXPECTED_NULL
Definition ggma_types.h:44
@ GGMA_STATUS_ERROR
Definition ggma_types.h:42
Definition Mean.cpp:30