diff --git a/core/cli/run.go b/core/cli/run.go index 9b46b570133e..ba8a677ee3f7 100644 --- a/core/cli/run.go +++ b/core/cli/run.go @@ -77,6 +77,8 @@ type RunCMD struct { DisableGalleryEndpoint bool `env:"LOCALAI_DISABLE_GALLERY_ENDPOINT,DISABLE_GALLERY_ENDPOINT" help:"Disable the gallery endpoints" group:"api"` MachineTag string `env:"LOCALAI_MACHINE_TAG,MACHINE_TAG" help:"Add Machine-Tag header to each response which is useful to track the machine in the P2P network" group:"api"` LoadToMemory []string `env:"LOCALAI_LOAD_TO_MEMORY,LOAD_TO_MEMORY" help:"A list of models to load into memory at startup" group:"models"` + EnableTracing bool `env:"LOCALAI_ENABLE_TRACING,ENABLE_TRACING" help:"Enable API tracing" group:"api"` + TracingMaxItems int `env:"LOCALAI_TRACING_MAX_ITEMS" default:"1024" help:"Maximum number of traces to keep" group:"api"` AgentJobRetentionDays int `env:"LOCALAI_AGENT_JOB_RETENTION_DAYS,AGENT_JOB_RETENTION_DAYS" default:"30" help:"Number of days to keep agent job history (default: 30)" group:"api"` Version bool @@ -149,6 +151,15 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error { opts = append(opts, config.DisableRuntimeSettings) } + if r.EnableTracing { + opts = append(opts, config.EnableTracing) + } + + if r.EnableTracing { + opts = append(opts, config.EnableTracing) + } + opts = append(opts, config.WithTracingMaxItems(r.TracingMaxItems)) + token := "" if r.Peer2Peer || r.Peer2PeerToken != "" { xlog.Info("P2P mode enabled") diff --git a/core/config/application_config.go b/core/config/application_config.go index 07b8b7682ceb..4d393c7d5dc2 100644 --- a/core/config/application_config.go +++ b/core/config/application_config.go @@ -19,6 +19,8 @@ type ApplicationConfig struct { UploadLimitMB, Threads, ContextSize int F16 bool Debug bool + EnableTracing bool + TracingMaxItems int GeneratedContentDir string UploadDir string @@ -89,6 +91,7 @@ func NewApplicationConfig(o ...AppOption) *ApplicationConfig { Context: context.Background(), UploadLimitMB: 15, Debug: true, + TracingMaxItems: 1024, AgentJobRetentionDays: 30, // Default: 30 days PathWithoutAuth: []string{ "/static/", @@ -158,6 +161,10 @@ var EnableWatchDog = func(o *ApplicationConfig) { o.WatchDog = true } +var EnableTracing = func(o *ApplicationConfig) { + o.EnableTracing = true +} + var EnableWatchDogIdleCheck = func(o *ApplicationConfig) { o.WatchDog = true o.WatchDogIdle = true @@ -386,6 +393,12 @@ func WithDebug(debug bool) AppOption { } } +func WithTracingMaxItems(items int) AppOption { + return func(o *ApplicationConfig) { + o.TracingMaxItems = items + } +} + func WithGeneratedContentDir(generatedContentDir string) AppOption { return func(o *ApplicationConfig) { o.GeneratedContentDir = generatedContentDir @@ -509,6 +522,8 @@ func (o *ApplicationConfig) ToRuntimeSettings() RuntimeSettings { contextSize := o.ContextSize f16 := o.F16 debug := o.Debug + tracingMaxItems := o.TracingMaxItems + enableTracing := o.EnableTracing cors := o.CORS csrf := o.CSRF corsAllowOrigins := o.CORSAllowOrigins @@ -556,6 +571,8 @@ func (o *ApplicationConfig) ToRuntimeSettings() RuntimeSettings { ContextSize: &contextSize, F16: &f16, Debug: &debug, + TracingMaxItems: &tracingMaxItems, + EnableTracing: &enableTracing, CORS: &cors, CSRF: &csrf, CORSAllowOrigins: &corsAllowOrigins, @@ -656,6 +673,12 @@ func (o *ApplicationConfig) ApplyRuntimeSettings(settings *RuntimeSettings) (req if settings.Debug != nil { o.Debug = *settings.Debug } + if settings.EnableTracing != nil { + o.EnableTracing = *settings.EnableTracing + } + if settings.TracingMaxItems != nil { + o.TracingMaxItems = *settings.TracingMaxItems + } if settings.CORS != nil { o.CORS = *settings.CORS } diff --git a/core/config/runtime_settings.go b/core/config/runtime_settings.go index c02d4fcd7c20..01c6f383d703 100644 --- a/core/config/runtime_settings.go +++ b/core/config/runtime_settings.go @@ -27,10 +27,12 @@ type RuntimeSettings struct { MemoryReclaimerThreshold *float64 `json:"memory_reclaimer_threshold,omitempty"` // Threshold 0.0-1.0 (e.g., 0.95 = 95%) // Performance settings - Threads *int `json:"threads,omitempty"` - ContextSize *int `json:"context_size,omitempty"` - F16 *bool `json:"f16,omitempty"` - Debug *bool `json:"debug,omitempty"` + Threads *int `json:"threads,omitempty"` + ContextSize *int `json:"context_size,omitempty"` + F16 *bool `json:"f16,omitempty"` + Debug *bool `json:"debug,omitempty"` + EnableTracing *bool `json:"enable_tracing,omitempty"` + TracingMaxItems *int `json:"tracing_max_items,omitempty"` // Security/CORS settings CORS *bool `json:"cors,omitempty"` diff --git a/core/http/middleware/trace.go b/core/http/middleware/trace.go new file mode 100644 index 000000000000..aa63ba349f37 --- /dev/null +++ b/core/http/middleware/trace.go @@ -0,0 +1,156 @@ +package middleware + +import ( + "bytes" + "github.com/emirpasic/gods/v2/queues/circularbuffer" + "io" + "net/http" + "sort" + "sync" + "time" + + "github.com/labstack/echo/v4" + "github.com/mudler/LocalAI/core/application" + "github.com/mudler/xlog" +) + +type APIExchangeRequest struct { + Method string `json:"method"` + Path string `json:"path"` + Headers *http.Header `json:"headers"` + Body *[]byte `json:"body"` +} + +type APIExchangeResponse struct { + Status int `json:"status"` + Headers *http.Header `json:"headers"` + Body *[]byte `json:"body"` +} + +type APIExchange struct { + Timestamp time.Time `json:"timestamp"` + Request APIExchangeRequest `json:"request"` + Response APIExchangeResponse `json:"response"` +} + +var traceBuffer *circularbuffer.Queue[APIExchange] +var mu sync.Mutex +var logChan = make(chan APIExchange, 100) + +type bodyWriter struct { + http.ResponseWriter + body *bytes.Buffer +} + +func (w *bodyWriter) Write(b []byte) (int, error) { + w.body.Write(b) + return w.ResponseWriter.Write(b) +} + +func (w *bodyWriter) Flush() { + if flusher, ok := w.ResponseWriter.(http.Flusher); ok { + flusher.Flush() + } +} + +// TraceMiddleware intercepts and logs JSON API requests and responses +func TraceMiddleware(app *application.Application) echo.MiddlewareFunc { + if app.ApplicationConfig().EnableTracing && traceBuffer == nil { + traceBuffer = circularbuffer.New[APIExchange](app.ApplicationConfig().TracingMaxItems) + + go func() { + for exchange := range logChan { + mu.Lock() + traceBuffer.Enqueue(exchange) + mu.Unlock() + } + }() + } + + return func(next echo.HandlerFunc) echo.HandlerFunc { + return func(c echo.Context) error { + if !app.ApplicationConfig().EnableTracing { + return next(c) + } + + if c.Request().Header.Get("Content-Type") != "application/json" { + return next(c) + } + + body, err := io.ReadAll(c.Request().Body) + if err != nil { + xlog.Error("Failed to read request body") + return err + } + + // Restore the body for downstream handlers + c.Request().Body = io.NopCloser(bytes.NewBuffer(body)) + + startTime := time.Now() + + // Wrap response writer to capture body + resBody := new(bytes.Buffer) + mw := &bodyWriter{ + ResponseWriter: c.Response().Writer, + body: resBody, + } + c.Response().Writer = mw + + err = next(c) + if err != nil { + c.Response().Writer = mw.ResponseWriter // Restore original writer if error + return err + } + + // Create exchange log + requestHeaders := c.Request().Header.Clone() + requestBody := make([]byte, len(body)) + copy(requestBody, body) + responseHeaders := c.Response().Header().Clone() + responseBody := make([]byte, resBody.Len()) + copy(responseBody, resBody.Bytes()) + exchange := APIExchange{ + Timestamp: startTime, + Request: APIExchangeRequest{ + Method: c.Request().Method, + Path: c.Path(), + Headers: &requestHeaders, + Body: &requestBody, + }, + Response: APIExchangeResponse{ + Status: c.Response().Status, + Headers: &responseHeaders, + Body: &responseBody, + }, + } + + select { + case logChan <- exchange: + default: + xlog.Warn("Trace channel full, dropping trace") + } + + return nil + } + } +} + +// GetTraces returns a copy of the logged API exchanges for display +func GetTraces() []APIExchange { + mu.Lock() + traces := traceBuffer.Values() + mu.Unlock() + + sort.Slice(traces, func(i, j int) bool { + return traces[i].Timestamp.Before(traces[j].Timestamp) + }) + + return traces +} + +// ClearTraces clears the in-memory logs +func ClearTraces() { + mu.Lock() + traceBuffer.Clear() + mu.Unlock() +} diff --git a/core/http/routes/openai.go b/core/http/routes/openai.go index 921582a6b54b..93fed71dbb59 100644 --- a/core/http/routes/openai.go +++ b/core/http/routes/openai.go @@ -14,16 +14,18 @@ func RegisterOpenAIRoutes(app *echo.Echo, re *middleware.RequestExtractor, application *application.Application) { // openAI compatible API endpoint + traceMiddleware := middleware.TraceMiddleware(application) // realtime // TODO: Modify/disable the API key middleware for this endpoint to allow ephemeral keys created by sessions app.GET("/v1/realtime", openai.Realtime(application)) - app.POST("/v1/realtime/sessions", openai.RealtimeTranscriptionSession(application)) - app.POST("/v1/realtime/transcription_session", openai.RealtimeTranscriptionSession(application)) + app.POST("/v1/realtime/sessions", openai.RealtimeTranscriptionSession(application), traceMiddleware) + app.POST("/v1/realtime/transcription_session", openai.RealtimeTranscriptionSession(application), traceMiddleware) // chat chatHandler := openai.ChatEndpoint(application.ModelConfigLoader(), application.ModelLoader(), application.TemplatesEvaluator(), application.ApplicationConfig()) chatMiddleware := []echo.MiddlewareFunc{ + traceMiddleware, re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_CHAT)), re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }), func(next echo.HandlerFunc) echo.HandlerFunc { @@ -41,6 +43,7 @@ func RegisterOpenAIRoutes(app *echo.Echo, // edit editHandler := openai.EditEndpoint(application.ModelConfigLoader(), application.ModelLoader(), application.TemplatesEvaluator(), application.ApplicationConfig()) editMiddleware := []echo.MiddlewareFunc{ + traceMiddleware, re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_EDIT)), re.BuildConstantDefaultModelNameMiddleware("gpt-4o"), re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }), @@ -59,6 +62,7 @@ func RegisterOpenAIRoutes(app *echo.Echo, // completion completionHandler := openai.CompletionEndpoint(application.ModelConfigLoader(), application.ModelLoader(), application.TemplatesEvaluator(), application.ApplicationConfig()) completionMiddleware := []echo.MiddlewareFunc{ + traceMiddleware, re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_COMPLETION)), re.BuildConstantDefaultModelNameMiddleware("gpt-4o"), re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }), @@ -78,6 +82,7 @@ func RegisterOpenAIRoutes(app *echo.Echo, // MCPcompletion mcpCompletionHandler := openai.MCPCompletionEndpoint(application.ModelConfigLoader(), application.ModelLoader(), application.TemplatesEvaluator(), application.ApplicationConfig()) mcpCompletionMiddleware := []echo.MiddlewareFunc{ + traceMiddleware, re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_CHAT)), re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }), func(next echo.HandlerFunc) echo.HandlerFunc { @@ -95,6 +100,7 @@ func RegisterOpenAIRoutes(app *echo.Echo, // embeddings embeddingHandler := openai.EmbeddingsEndpoint(application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig()) embeddingMiddleware := []echo.MiddlewareFunc{ + traceMiddleware, re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_EMBEDDINGS)), re.BuildConstantDefaultModelNameMiddleware("gpt-4o"), re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }), @@ -113,6 +119,7 @@ func RegisterOpenAIRoutes(app *echo.Echo, audioHandler := openai.TranscriptEndpoint(application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig()) audioMiddleware := []echo.MiddlewareFunc{ + traceMiddleware, re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_TRANSCRIPT)), re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }), func(next echo.HandlerFunc) echo.HandlerFunc { @@ -130,6 +137,7 @@ func RegisterOpenAIRoutes(app *echo.Echo, audioSpeechHandler := localai.TTSEndpoint(application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig()) audioSpeechMiddleware := []echo.MiddlewareFunc{ + traceMiddleware, re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_TTS)), re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.TTSRequest) }), } @@ -140,6 +148,7 @@ func RegisterOpenAIRoutes(app *echo.Echo, // images imageHandler := openai.ImageEndpoint(application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig()) imageMiddleware := []echo.MiddlewareFunc{ + traceMiddleware, // Default: use the first available image generation model re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_IMAGE)), re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }), @@ -164,6 +173,7 @@ func RegisterOpenAIRoutes(app *echo.Echo, // videos (OpenAI-compatible endpoints mapped to LocalAI video handler) videoHandler := openai.VideoEndpoint(application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig()) videoMiddleware := []echo.MiddlewareFunc{ + traceMiddleware, re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_VIDEO)), re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }), func(next echo.HandlerFunc) echo.HandlerFunc { diff --git a/core/http/routes/ui.go b/core/http/routes/ui.go index 4b852875fa77..da6f5d1ee7f5 100644 --- a/core/http/routes/ui.go +++ b/core/http/routes/ui.go @@ -317,4 +317,24 @@ func RegisterUIRoutes(app *echo.Echo, // Render index return c.Render(200, "views/tts", summary) }) + + // Traces UI + app.GET("/traces", func(c echo.Context) error { + summary := map[string]interface{}{ + "Title": "LocalAI - Traces", + "BaseURL": middleware.BaseURL(c), + "Version": internal.PrintableVersion(), + } + return c.Render(200, "views/traces", summary) + }) + + app.GET("/api/traces", func(c echo.Context) error { + return c.JSON(200, middleware.GetTraces()) + }) + + app.POST("/api/traces/clear", func(c echo.Context) error { + middleware.ClearTraces() + return c.NoContent(204) + }) + } diff --git a/core/http/routes/ui_api.go b/core/http/routes/ui_api.go index 36194d5c06c4..84af2e32fe57 100644 --- a/core/http/routes/ui_api.go +++ b/core/http/routes/ui_api.go @@ -16,6 +16,7 @@ import ( "github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/gallery" "github.com/mudler/LocalAI/core/http/endpoints/localai" + "github.com/mudler/LocalAI/core/http/middleware" "github.com/mudler/LocalAI/core/p2p" "github.com/mudler/LocalAI/core/services" "github.com/mudler/LocalAI/pkg/model" @@ -947,4 +948,24 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model app.GET("/api/settings", localai.GetSettingsEndpoint(applicationInstance)) app.POST("/api/settings", localai.UpdateSettingsEndpoint(applicationInstance)) } + + // Logs API + app.GET("/api/traces", func(c echo.Context) error { + if !appConfig.EnableTracing { + return c.JSON(503, map[string]any{ + "error": "Tracing disabled", + }) + } + traces := middleware.GetTraces() + return c.JSON(200, map[string]interface{}{ + "traces": traces, + }) + }) + + app.POST("/api/traces/clear", func(c echo.Context) error { + middleware.ClearTraces() + return c.JSON(200, map[string]interface{}{ + "message": "Traces cleared", + }) + }) } diff --git a/core/http/views/partials/navbar.html b/core/http/views/partials/navbar.html index 2afdfa36e13f..cbc092c41a04 100644 --- a/core/http/views/partials/navbar.html +++ b/core/http/views/partials/navbar.html @@ -37,6 +37,9 @@ Agent Jobs + + Traces + API @@ -94,6 +97,9 @@ Agent Jobs + + Traces + API diff --git a/core/http/views/settings.html b/core/http/views/settings.html index c960862a07c5..b06c85a6d965 100644 --- a/core/http/views/settings.html +++ b/core/http/views/settings.html @@ -10,7 +10,7 @@
Time before an idle backend is stopped (e.g., 15m, 1h)
- Automatically stop backends that are busy for too long (stuck processes)Time before a busy backend is stopped (e.g., 5m, 30m)
- Check IntervalHow often the watchdog checks backends and memory usage (e.g., 2s, 30s)
-When memory usage exceeds this, backends will be evicted (50-100%)
Maximum number of models to keep loaded at once (0 = unlimited, 1 = single backend mode). Least recently used models are evicted when limit is reached.
- Enable backends to handle multiple requests in parallel (if supported) @@ -263,7 +263,7 @@Number of threads to use for model inference (0 = auto)
- @@ -273,7 +273,7 @@Default context window size for models
- @@ -286,7 +286,7 @@Enable tracing of requests and responses
+Maximum number of tracing items to keep
+ +Comma-separated list of allowed origins
- @@ -347,7 +373,7 @@Authentication token for P2P network (set to 0 to generate a new token)
- @@ -379,7 +405,7 @@Network identifier for P2P connections
- @@ -391,7 +417,7 @@Number of days to keep job history (default: 30)
- @@ -437,7 +463,7 @@List of API keys (one per line or comma-separated)
- @@ -464,7 +490,7 @@Array of gallery objects with 'url' and 'name' fields
- @@ -497,7 +523,7 @@Array of backend gallery objects with 'url' and 'name' fields
- @@ -521,7 +547,7 @@