diff --git a/backend/cpp/llama-cpp/Makefile b/backend/cpp/llama-cpp/Makefile
index 56fdae48a3ce..09496dd39883 100644
--- a/backend/cpp/llama-cpp/Makefile
+++ b/backend/cpp/llama-cpp/Makefile
@@ -1,5 +1,5 @@
 
-LLAMA_VERSION?=52ab19df633f3de5d4db171a16f2d9edd2342fec
+LLAMA_VERSION?=0e1ccf15c7b6d05c720551b537857ecf6194d420
 LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
 
 CMAKE_ARGS?=
diff --git a/backend/cpp/llama-cpp/grpc-server.cpp b/backend/cpp/llama-cpp/grpc-server.cpp
index e08413d0cf58..dca3e3ae2ef2 100644
--- a/backend/cpp/llama-cpp/grpc-server.cpp
+++ b/backend/cpp/llama-cpp/grpc-server.cpp
@@ -78,9 +78,6 @@ static void start_llama_server(server_context& ctx_server) {
         std::this_thread::sleep_for(std::chrono::milliseconds(100));
     }
 
-    ctx_server.init();
-    //state.store(SERVER_STATE_READY);
-
     LOG_INF("%s: model loaded\n", __func__);
 
     // print sample chat example to make it clear which template is used