Skip to content

Commit f87e6ac

Browse files
committed
Merge branch 'feature/llama_supports_rpc' of github.com:martindevans/llama.cpp into feature/llama_supports_rpc
2 parents b67c0df + 9b15621 commit f87e6ac

File tree

1 file changed

+46
-23
lines changed

1 file changed

+46
-23
lines changed

llama.h

Lines changed: 46 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ extern "C" {
9797
LLAMA_ROPE_TYPE_GLM = 4,
9898
};
9999

100-
enum llama_token_type { //TODO: remove, required until per token attributes are available from GGUF file
100+
enum llama_token_type {
101101
LLAMA_TOKEN_TYPE_UNDEFINED = 0,
102102
LLAMA_TOKEN_TYPE_NORMAL = 1,
103103
LLAMA_TOKEN_TYPE_UNKNOWN = 2,
@@ -107,20 +107,6 @@ extern "C" {
107107
LLAMA_TOKEN_TYPE_BYTE = 6,
108108
};
109109

110-
enum llama_token_attr {
111-
LLAMA_TOKEN_ATTR_UNDEFINED = 0,
112-
LLAMA_TOKEN_ATTR_UNKNOWN = 1 << 0,
113-
LLAMA_TOKEN_ATTR_UNUSED = 1 << 1,
114-
LLAMA_TOKEN_ATTR_NORMAL = 1 << 2,
115-
LLAMA_TOKEN_ATTR_CONTROL = 1 << 3, // SPECIAL?
116-
LLAMA_TOKEN_ATTR_USER_DEFINED = 1 << 4,
117-
LLAMA_TOKEN_ATTR_BYTE = 1 << 5,
118-
LLAMA_TOKEN_ATTR_NORMALIZED = 1 << 6,
119-
LLAMA_TOKEN_ATTR_LSTRIP = 1 << 7,
120-
LLAMA_TOKEN_ATTR_RSTRIP = 1 << 8,
121-
LLAMA_TOKEN_ATTR_SINGLE_WORD = 1 << 9,
122-
};
123-
124110
// model file types
125111
enum llama_ftype {
126112
LLAMA_FTYPE_ALL_F32 = 0,
@@ -365,9 +351,6 @@ extern "C" {
365351
// modifies a preceding LLAMA_GRETYPE_CHAR or
366352
// LLAMA_GRETYPE_CHAR_RNG_UPPER to add an alternate char to match ([ab], [a-zA])
367353
LLAMA_GRETYPE_CHAR_ALT = 6,
368-
369-
// any character (.)
370-
LLAMA_GRETYPE_CHAR_ANY = 7,
371354
};
372355

373356
typedef struct llama_grammar_element {
@@ -430,7 +413,7 @@ extern "C" {
430413

431414
LLAMA_API bool llama_supports_mmap (void);
432415
LLAMA_API bool llama_supports_mlock (void);
433-
LLAMA_API bool llama_supports_rpc (void);
416+
LLAMA_API bool llama_supports_rpc (void); // TMP: https://github.com/ggerganov/llama.cpp/pull/7647#issuecomment-2140234367
434417
LLAMA_API bool llama_supports_gpu_offload(void);
435418

436419
LLAMA_API const struct llama_model * llama_get_model(const struct llama_context * ctx);
@@ -442,8 +425,8 @@ extern "C" {
442425

443426
LLAMA_API enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx);
444427

445-
LLAMA_API enum llama_vocab_type llama_vocab_type (const struct llama_model * model);
446-
LLAMA_API enum llama_rope_type llama_rope_type (const struct llama_model * model);
428+
LLAMA_API enum llama_vocab_type llama_vocab_type (const struct llama_model * model);
429+
LLAMA_API enum llama_rope_type llama_rope_type (const struct llama_model * model);
447430

448431
LLAMA_API int32_t llama_n_vocab (const struct llama_model * model);
449432
LLAMA_API int32_t llama_n_ctx_train(const struct llama_model * model);
@@ -839,7 +822,7 @@ extern "C" {
839822

840823
LLAMA_API float llama_token_get_score(const struct llama_model * model, llama_token token);
841824

842-
LLAMA_API enum llama_token_attr llama_token_get_attr(const struct llama_model * model, llama_token token);
825+
LLAMA_API enum llama_token_type llama_token_get_type(const struct llama_model * model, llama_token token);
843826

844827
// Check if the token is supposed to end generation (end-of-generation, eg. EOS, EOT, etc.)
845828
LLAMA_API bool llama_token_is_eog(const struct llama_model * model, llama_token token);
@@ -1060,9 +1043,49 @@ extern "C" {
10601043
llama_token token);
10611044

10621045
//
1063-
// Model split
1046+
// Beam search
10641047
//
10651048

1049+
struct llama_beam_view {
1050+
const llama_token * tokens;
1051+
1052+
size_t n_tokens;
1053+
float p; // Cumulative beam probability (renormalized relative to all beams)
1054+
bool eob; // Callback should set this to true when a beam is at end-of-beam.
1055+
};
1056+
1057+
// Passed to beam_search_callback function.
1058+
// Whenever 0 < common_prefix_length, this number of tokens should be copied from any of the beams
1059+
// (e.g. beams[0]) as they will be removed (shifted) from all beams in all subsequent callbacks.
1060+
// These pointers are valid only during the synchronous callback, so should not be saved.
1061+
struct llama_beams_state {
1062+
struct llama_beam_view * beam_views;
1063+
1064+
size_t n_beams; // Number of elements in beam_views[].
1065+
size_t common_prefix_length; // Current max length of prefix tokens shared by all beams.
1066+
bool last_call; // True iff this is the last callback invocation.
1067+
};
1068+
1069+
// Type of pointer to the beam_search_callback function.
1070+
// void* callback_data is any custom data passed to llama_beam_search, that is subsequently
1071+
// passed back to beam_search_callback. This avoids having to use global variables in the callback.
1072+
typedef void (*llama_beam_search_callback_fn_t)(void * callback_data, struct llama_beams_state);
1073+
1074+
/// @details Deterministically returns entire sentence constructed by a beam search.
1075+
/// @param ctx Pointer to the llama_context.
1076+
/// @param callback Invoked for each iteration of the beam_search loop, passing in beams_state.
1077+
/// @param callback_data A pointer that is simply passed back to callback.
1078+
/// @param n_beams Number of beams to use.
1079+
/// @param n_past Number of tokens already evaluated.
1080+
/// @param n_predict Maximum number of tokens to predict. EOS may occur earlier.
1081+
LLAMA_API void llama_beam_search(
1082+
struct llama_context * ctx,
1083+
llama_beam_search_callback_fn_t callback,
1084+
void * callback_data,
1085+
size_t n_beams,
1086+
int32_t n_past,
1087+
int32_t n_predict);
1088+
10661089
/// @details Build a split GGUF final path for this chunk.
10671090
/// llama_split_path(split_path, sizeof(split_path), "/models/ggml-model-q4_0", 2, 4) => split_path = "/models/ggml-model-q4_0-00002-of-00004.gguf"
10681091
// Returns the split_path length.

0 commit comments

Comments
 (0)