35 Context(
const char *package_path);
38 void prefill(
ggma_token *tokens,
size_t n_tokens, std::vector<uint8_t> &hidden_state);
39 void unemb(std::vector<uint8_t> &hidden_state,
size_t n_tokens, std::vector<float> &logits);
46 template <
bool ReturnLogits,
typename OutputType>
47 void decode_impl(
ggma_token token_id, OutputType &output);
56 std::string _package_path;
GGMAConfig load_config(const std::string &package_path)
void prefill(ggma_token *tokens, size_t n_tokens, std::vector< uint8_t > &hidden_state)
GGMA_STATUS generate(ggma_token *tokens, size_t n_tokens, size_t n_tokens_max, size_t *n_predict)
void decode(ggma_token token_id, std::vector< uint8_t > &hidden_state)
ggma_token sample(const std::vector< float > &logits)
void unemb(std::vector< uint8_t > &hidden_state, size_t n_tokens, std::vector< float > &logits)