diff --git a/backend/cpp/llama-cpp/Makefile b/backend/cpp/llama-cpp/Makefile index 56fdae48a3ce..09496dd39883 100644 --- a/backend/cpp/llama-cpp/Makefile +++ b/backend/cpp/llama-cpp/Makefile @@ -1,5 +1,5 @@ -LLAMA_VERSION?=52ab19df633f3de5d4db171a16f2d9edd2342fec +LLAMA_VERSION?=0e1ccf15c7b6d05c720551b537857ecf6194d420 LLAMA_REPO?=https://github.com/ggerganov/llama.cpp CMAKE_ARGS?= diff --git a/backend/cpp/llama-cpp/grpc-server.cpp b/backend/cpp/llama-cpp/grpc-server.cpp index e08413d0cf58..dca3e3ae2ef2 100644 --- a/backend/cpp/llama-cpp/grpc-server.cpp +++ b/backend/cpp/llama-cpp/grpc-server.cpp @@ -78,9 +78,6 @@ static void start_llama_server(server_context& ctx_server) { std::this_thread::sleep_for(std::chrono::milliseconds(100)); } - ctx_server.init(); - //state.store(SERVER_STATE_READY); - LOG_INF("%s: model loaded\n", __func__); // print sample chat example to make it clear which template is used