53{
54 if (!text || !tokens || !n_tokens || max_tokens == 0 || !_processor)
55 {
56 if (n_tokens)
57 *n_tokens = 0;
58 return 0;
59 }
60
61 std::string input_text(text, text_len);
62 std::vector<int> piece_ids;
63
64 auto status = _processor->Encode(input_text, &piece_ids);
65 if (!status.ok())
66 {
67 if (n_tokens)
68 *n_tokens = 0;
69 return 0;
70 }
71
72
73 std::fill(tokens, tokens + max_tokens, 0);
74
75
76 int bos_id = _processor->bos_id();
77 size_t bos_offset = 0;
78
79
80 if (bos_id >= 0 && max_tokens > 0)
81 {
82 tokens[0] = bos_id;
83 bos_offset = 1;
84 }
85
86 size_t available_space = max_tokens - bos_offset;
87 size_t token_count = std::min(piece_ids.size(), available_space);
88
89 for (size_t i = 0; i < token_count; ++i)
90 tokens[bos_offset + i] = static_cast<int32_t>(piece_ids[i]);
91
92 *n_tokens = bos_offset + token_count;
93 return bos_offset + token_count;
94}