From 2073a295a7700054cf70cf905ca435406b8b6be4 Mon Sep 17 00:00:00 2001 From: Joel Niklaus Date: Tue, 7 Jan 2025 00:23:41 -0800 Subject: [PATCH] Fix issue with encodings for together models. (#483) --- src/lighteval/models/litellm_model.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/lighteval/models/litellm_model.py b/src/lighteval/models/litellm_model.py index 21dfc45a..9e29f569 100644 --- a/src/lighteval/models/litellm_model.py +++ b/src/lighteval/models/litellm_model.py @@ -255,12 +255,18 @@ def greedy_until( def tokenizer(self): return self._tokenizer + def _encode(self, text: str): + enc = encode(model=self.model, text=text) + if hasattr(enc, "ids"): + return enc.ids + return enc + def tok_encode(self, text: str | list[str]): if isinstance(text, list): - toks = [encode(model=self.model, text=t["content"]) for t in text] + toks = [self._encode(t["content"]) for t in text] toks = [tok for tok in toks if tok] return toks - return encode(model=self.model, text=text) + return self._encode(text) @property def add_special_tokens(self) -> bool: