From e3b7766a4ac74e2f1187e9f0743c780182e5217e Mon Sep 17 00:00:00 2001 From: Luca Beurer-Kellner Date: Thu, 27 Jul 2023 11:13:53 +0200 Subject: [PATCH] fix llama score normalization --- src/lmql/models/lmtp/backends/llama_cpp_model.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lmql/models/lmtp/backends/llama_cpp_model.py b/src/lmql/models/lmtp/backends/llama_cpp_model.py index 58f3a747..cd4d2b34 100644 --- a/src/lmql/models/lmtp/backends/llama_cpp_model.py +++ b/src/lmql/models/lmtp/backends/llama_cpp_model.py @@ -40,9 +40,9 @@ def score(self, input_ids, attention_mask, **model_kwargs): self.llm.n_tokens = longest_prefix self.llm.eval(tokens) - scores = np.array([self.llm.scores[j][i] for j,i in enumerate(input_ids[0])]) - scores = nputil.log_softmax(scores, axis=-1) - # print("llama_cpp_model: score() took", time.time() - s, "seconds", file=sys.stderr) + logits = np.array(self.llm.scores) + logits = nputil.log_softmax(logits, axis=-1) + scores = np.array([logits[j][i] for j,i in enumerate(input_ids[0])]) return scores.reshape(1, -1)