diff --git a/llms/mlx_lm/utils.py b/llms/mlx_lm/utils.py index 95e6fd462..899f20e8f 100644 --- a/llms/mlx_lm/utils.py +++ b/llms/mlx_lm/utils.py @@ -375,6 +375,8 @@ def generate( break if is_batch: output_toks.append(tokens) + if verbose: + print(".", end="", flush=True) else: token = tokens.item() logprobs = logprobs.squeeze(0) @@ -404,6 +406,7 @@ def generate( if token_count <= 0: print("No tokens generated for this prompt") if is_batch: + print() for p, resp in zip(prompt, response): print("=" * 10) print("Prompt:", p)