diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp index c91ce854..a2e39a9c 100644 --- a/backend/cpp/llama/grpc-server.cpp +++ b/backend/cpp/llama/grpc-server.cpp @@ -1084,7 +1084,7 @@ struct llama_server_context slot.has_next_token = false; } - if (!slot.cache_tokens.empty() && result.tok == llama_token_eos(model)) + if (result.tok == llama_token_eos(model)) { slot.stopped_eos = true; slot.has_next_token = false;