diff --git a/src/lmql/models/lmtp/backends/llama_cpp_model.py b/src/lmql/models/lmtp/backends/llama_cpp_model.py index 58f3a747..cd4d2b34 100644 --- a/src/lmql/models/lmtp/backends/llama_cpp_model.py +++ b/src/lmql/models/lmtp/backends/llama_cpp_model.py @@ -40,9 +40,9 @@ def score(self, input_ids, attention_mask, **model_kwargs): self.llm.n_tokens = longest_prefix self.llm.eval(tokens) - scores = np.array([self.llm.scores[j][i] for j,i in enumerate(input_ids[0])]) - scores = nputil.log_softmax(scores, axis=-1) - # print("llama_cpp_model: score() took", time.time() - s, "seconds", file=sys.stderr) + logits = np.array(self.llm.scores) + logits = nputil.log_softmax(logits, axis=-1) + scores = np.array([logits[j][i] for j,i in enumerate(input_ids[0])]) return scores.reshape(1, -1)