ONE/runtime_2ggma_2src_2_context_8h_source.html

/*

 * Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved

 *

 * Licensed under the Apache License, Version 2.0 (the "License");

 * you may not use this file except in compliance with the License.

 * You may obtain a copy of the License at

 *

 *      http://www.apache.org/licenses/LICENSE-2.0

 *

 * Unless required by applicable law or agreed to in writing, software

 * distributed under the License is distributed on an "AS IS" BASIS,

 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 * See the License for the specific language governing permissions and

 * limitations under the License.

 */


#ifndef __GGMA_CONTEXT_H__

#define __GGMA_CONTEXT_H__


#include "ggma_types.h"

#include "Config.h"

#include "KVCache.h"


#include <cstdint>

#include <memory>

#include <string>

#include <vector>


namespace ggma

{


class Context

{

public:

  Context(const char *package_path);

  GGMAConfig load_config(const std::string &package_path);


  void prefill(ggma_token *tokens, size_t n_tokens, std::vector<uint8_t> &hidden_state);

  void unemb(std::vector<uint8_t> &hidden_state, size_t n_tokens, std::vector<float> &logits);

  ggma_token sample(const std::vector<float> &logits);

  void decode(ggma_token token_id, std::vector<uint8_t> &hidden_state);

  void decode(ggma_token token_id, std::vector<float> &logits);


private:

  // Template implementation to eliminate code duplication

  template <bool ReturnLogits, typename OutputType>

  void decode_impl(ggma_token token_id, OutputType &output);

  void init_kv_cache();


public:

  ~Context() = default;


  GGMA_STATUS generate(ggma_token *tokens, size_t n_tokens, size_t n_tokens_max, size_t *n_predict);


private:

  std::string _package_path;

  ggma::GGMAConfig _cfg;

  ggma::KVCache _cache;

};


} // namespace ggma


#endif // __GGMA_CONTEXT_H__

KVCache.h

ggma::Context
Definition Context.h:33

ggma::Context::load_config
GGMAConfig load_config(const std::string &package_path)
Definition Context.cc:84

ggma::Context::prefill
void prefill(ggma_token *tokens, size_t n_tokens, std::vector< uint8_t > &hidden_state)
Definition Context.cc:95

ggma::Context::generate
GGMA_STATUS generate(ggma_token *tokens, size_t n_tokens, size_t n_tokens_max, size_t *n_predict)
Definition Generate.cc:39

ggma::Context::decode
void decode(ggma_token token_id, std::vector< uint8_t > &hidden_state)
Definition Context.cc:276

ggma::Context::sample
ggma_token sample(const std::vector< float > &logits)
Definition Context.cc:294

ggma::Context::unemb
void unemb(std::vector< uint8_t > &hidden_state, size_t n_tokens, std::vector< float > &logits)
Definition Context.cc:161

ggma::Context::~Context
~Context()=default

ggma_types.h
This file defines the core types and status codes for GGMA API.

GGMA_STATUS
GGMA_STATUS
Enumeration of status codes returned by GGMA API functions.
Definition ggma_types.h:35

ggma_token
int32_t ggma_token
Definition ggma_types.h:53

ggma
Definition Config.cc:24

Config.h

ggma::GGMAConfig
Definition Config.h:66

ggma::KVCache
Definition KVCache.h:44