IBM
diff --git a/‎.env.example‎
Lines changed: 15 additions & 0 deletions b/‎.env.example‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 1 addition & 0 deletions b/‎README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎mcpgateway/config.py‎
Lines changed: 1 addition & 0 deletions b/‎mcpgateway/config.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎mcpgateway/services/gateway_service.py‎
Lines changed: 250 additions & 172 deletions b/‎mcpgateway/services/gateway_service.py‎
Lines changed: 250 additions & 172 deletions
diff --git a/‎mcpgateway/services/server_service.py‎
Lines changed: 156 additions & 70 deletions b/‎mcpgateway/services/server_service.py‎
Lines changed: 156 additions & 70 deletions
diff --git a/‎mcpgateway/services/tool_service.py‎
Lines changed: 90 additions & 54 deletions b/‎mcpgateway/services/tool_service.py‎
Lines changed: 90 additions & 54 deletions
diff --git a/‎mcpgateway/static/admin.js‎
Lines changed: 7 additions & 2 deletions b/‎mcpgateway/static/admin.js‎
Lines changed: 7 additions & 2 deletions
@@ -733,26 +733,41 @@ PROMPT_CACHE_SIZE=100
 MAX_PROMPT_SIZE=102400
 PROMPT_RENDER_TIMEOUT=10
 
+#####################################
+# MCP server Health Check COnfigurations
+#####################################
+
 # Health Check Configuration
 HEALTH_CHECK_INTERVAL=60
 # Health check timeout in seconds (default: 10, matches config.py)
 HEALTH_CHECK_TIMEOUT=10
 UNHEALTHY_THRESHOLD=5
 # Gateway URL validation timeout in seconds (default: 5, matches config.py)
 GATEWAY_VALIDATION_TIMEOUT=5
+# Maximum number of concurrent health checks per worker. Prevents resource exhaustion during health check operations (default: 20, matches config.py)
+MAX_CONCURRENT_HEALTH_CHECKS=20
 
 # File lock name for gateway service leader election
 # Used to coordinate multiple gateway instances when running in cluster mode
 # Default: "gateway_service_leader.lock"
 FILELOCK_NAME=gateway_service_leader.lock
 
+
+#####################################
+# Default Root Paths
+#####################################
+
 # Default root paths (JSON array)
 # List of default root paths for resource resolution
 # Example: ["/api/v1", "/mcp"]
 # Default: []
 DEFAULT_ROOTS=[]
 
+
+#####################################
 # OpenTelemetry Observability Configuration
+#####################################
+
 # Enable distributed tracing and metrics collection
 # Options: true (default), false
 OTEL_ENABLE_OBSERVABILITY=true
 
@@ -1996,6 +1996,7 @@ ENABLE_METRICS=false
 | `UNHEALTHY_THRESHOLD`   | Fail-count before peer deactivation,      | `3`     | int > 0 |
 |                         | Set to -1 if deactivation is not needed.  |         |         |
 | `GATEWAY_VALIDATION_TIMEOUT` | Gateway URL validation timeout (secs) | `5`     | int > 0 |
+| `MAX_CONCURRENT_HEALTH_CHECKS` | Max Concurrent health checks        | `20`     | int > 0 |
 | `FILELOCK_NAME`         | File lock for leader election             | `gateway_service_leader.lock` | string |
 | `DEFAULT_ROOTS`         | Default root paths for resources          | `[]`    | JSON array |
 
 
@@ -925,6 +925,7 @@ def parse_issuers(cls, v: Any) -> list[str]:
     health_check_interval: int = 60  # seconds
     health_check_timeout: int = 10  # seconds
     unhealthy_threshold: int = 5  # after this many failures, mark as Offline
+    max_concurrent_health_checks: int = 20  # maximum concurrent health checks per worker
 
     # Validation Gateway URL
     gateway_validation_timeout: int = 5  # seconds
 
@@ -307,30 +307,27 @@ async def get_top_tools(self, db: Session, limit: Optional[int] = 5) -> List[Top
                 - success_rate: Success rate percentage, or None if no metrics.
                 - last_execution: Timestamp of the last execution, or None if no metrics.
         """
+
+        success_rate = case(
+            (func.count(ToolMetric.id) > 0, func.sum(case((ToolMetric.is_success.is_(True), 1), else_=0)).cast(Float) * 100 / func.count(ToolMetric.id)), else_=None  # pylint: disable=not-callable
+        )
+
         query = (
-            db.query(
+            select(
                 DbTool.id,
                 DbTool.name,
                 func.count(ToolMetric.id).label("execution_count"),  # pylint: disable=not-callable
-                func.avg(ToolMetric.response_time).label("avg_response_time"),  # pylint: disable=not-callable
-                case(
-                    (
-                        func.count(ToolMetric.id) > 0,  # pylint: disable=not-callable
-                        func.sum(case((ToolMetric.is_success.is_(True), 1), else_=0)).cast(Float) / func.count(ToolMetric.id) * 100,  # pylint: disable=not-callable
-                    ),
-                    else_=None,
-                ).label("success_rate"),
-                func.max(ToolMetric.timestamp).label("last_execution"),  # pylint: disable=not-callable
+                func.avg(ToolMetric.response_time).label("avg_response_time"),
+                success_rate.label("success_rate"),
+                func.max(ToolMetric.timestamp).label("last_execution"),
             )
-            .outerjoin(ToolMetric)
+            .outerjoin(ToolMetric, ToolMetric.tool_id == DbTool.id)
             .group_by(DbTool.id, DbTool.name)
             .order_by(desc("execution_count"))
+            .limit(limit or 5)
         )
 
-        if limit is not None:
-            query = query.limit(limit)
-
-        results = query.all()
+        results = db.execute(query).all()
 
         return build_top_performers(results)
 
@@ -363,36 +360,38 @@ def _convert_tool_to_read(self, tool: DbTool, include_metrics: bool = True) -> T
         tool_dict = tool.__dict__.copy()
         tool_dict.pop("_sa_instance_state", None)
 
-        if include_metrics:
-            tool_dict["metrics"] = tool.metrics_summary
-        else:
-            tool_dict["metrics"] = None
-
         tool_dict["execution_count"] = tool.execution_count
+        tool_dict["metrics"] = tool.metrics_summary if include_metrics else None
 
         tool_dict["request_type"] = tool.request_type
         tool_dict["annotations"] = tool.annotations or {}
 
-        decoded_auth_value = decode_auth(tool.auth_value)
-        if tool.auth_type == "basic":
-            decoded_bytes = base64.b64decode(decoded_auth_value["Authorization"].split("Basic ")[1])
-            username, password = decoded_bytes.decode("utf-8").split(":")
-            tool_dict["auth"] = {
-                "auth_type": "basic",
-                "username": username,
-                "password": "********" if password else None,
-            }
-        elif tool.auth_type == "bearer":
-            tool_dict["auth"] = {
-                "auth_type": "bearer",
-                "token": "********" if decoded_auth_value["Authorization"] else None,
-            }
-        elif tool.auth_type == "authheaders":
-            tool_dict["auth"] = {
-                "auth_type": "authheaders",
-                "auth_header_key": next(iter(decoded_auth_value)),
-                "auth_header_value": "********" if decoded_auth_value[next(iter(decoded_auth_value))] else None,
-            }
+        # Only decode auth if auth_type is set
+        if tool.auth_type and tool.auth_value:
+            decoded_auth_value = decode_auth(tool.auth_value)
+            if tool.auth_type == "basic":
+                decoded_bytes = base64.b64decode(decoded_auth_value["Authorization"].split("Basic ")[1])
+                username, password = decoded_bytes.decode("utf-8").split(":")
+                tool_dict["auth"] = {
+                    "auth_type": "basic",
+                    "username": username,
+                    "password": "********" if password else None,
+                }
+            elif tool.auth_type == "bearer":
+                tool_dict["auth"] = {
+                    "auth_type": "bearer",
+                    "token": "********" if decoded_auth_value["Authorization"] else None,
+                }
+            elif tool.auth_type == "authheaders":
+                # Get first key
+                first_key = next(iter(decoded_auth_value))
+                tool_dict["auth"] = {
+                    "auth_type": "authheaders",
+                    "auth_header_key": first_key,
+                    "auth_header_value": "********" if decoded_auth_value[first_key] else None,
+                }
+            else:
+                tool_dict["auth"] = None
         else:
             tool_dict["auth"] = None
 
@@ -791,10 +790,17 @@ async def list_tools(
         if has_more:
             tools = tools[:page_size]  # Trim to page_size
 
+        # Batch fetch team names for all tools at once
+        team_ids = {getattr(t, "team_id", None) for t in tools if getattr(t, "team_id", None)}
+        team_name_map = {}
+        if team_ids:
+            teams = db.query(EmailTeam.id, EmailTeam.name).filter(EmailTeam.id.in_(team_ids), EmailTeam.is_active.is_(True)).all()
+            team_name_map = {team.id: team.name for team in teams}
+
         # Convert to ToolRead objects
         result = []
         for t in tools:
-            team_name = self._get_team_name(db, getattr(t, "team_id", None))
+            team_name = team_name_map.get(getattr(t, "team_id", None))
             t.team = team_name
             result.append(self._convert_tool_to_read(t))
 
@@ -944,9 +950,17 @@ async def list_tools_for_user(
         # query = query.offset(skip).limit(limit)
 
         tools = db.execute(query).scalars().all()
+
+        # Batch fetch team names for all tools at once
+        tool_team_ids = {getattr(t, "team_id", None) for t in tools if getattr(t, "team_id", None)}
+        team_name_map = {}
+        if tool_team_ids:
+            teams = db.query(EmailTeam.id, EmailTeam.name).filter(EmailTeam.id.in_(tool_team_ids), EmailTeam.is_active.is_(True)).all()
+            team_name_map = {team.id: team.name for team in teams}
+
         result = []
         for t in tools:
-            team_name = self._get_team_name(db, getattr(t, "team_id", None))
+            team_name = team_name_map.get(getattr(t, "team_id", None))
             t.team = team_name
             result.append(self._convert_tool_to_read(t))
         return result
@@ -1876,31 +1890,53 @@ async def aggregate_metrics(self, db: Session) -> Dict[str, Any]:
             >>> from unittest.mock import MagicMock
             >>> service = ToolService()
             >>> db = MagicMock()
-            >>> db.execute.return_value.scalar.return_value = 0
+            >>> # Mock the row result object returned by db.execute().one()
+            >>> mock_result_row = MagicMock()
+            >>> mock_result_row.total = 10
+            >>> mock_result_row.successful = 8
+            >>> mock_result_row.failed = 2
+            >>> mock_result_row.min_rt = 50.0
+            >>> mock_result_row.max_rt = 250.0
+            >>> mock_result_row.avg_rt = 150.0
+            >>> mock_result_row.last_time = "2023-01-01T12:00:00"
+            >>> db.execute.return_value.one.return_value = mock_result_row
             >>> import asyncio
             >>> result = asyncio.run(service.aggregate_metrics(db))
             >>> isinstance(result, dict)
             True
+            >>> result['total_executions']
+            10
+            >>> result['failure_rate']
+            0.2
         """
 
-        total = db.execute(select(func.count(ToolMetric.id))).scalar() or 0  # pylint: disable=not-callable
-        successful = db.execute(select(func.count(ToolMetric.id)).where(ToolMetric.is_success.is_(True))).scalar() or 0  # pylint: disable=not-callable
-        failed = db.execute(select(func.count(ToolMetric.id)).where(ToolMetric.is_success.is_(False))).scalar() or 0  # pylint: disable=not-callable
+        # Query to get all aggregated metrics at once
+        result = db.execute(
+            select(
+                func.count(ToolMetric.id).label("total"),  # pylint: disable=not-callable
+                func.sum(case((ToolMetric.is_success.is_(True), 1), else_=0)).label("successful"),  # pylint: disable=not-callable
+                func.sum(case((ToolMetric.is_success.is_(False), 1), else_=0)).label("failed"),  # pylint: disable=not-callable
+                func.min(ToolMetric.response_time).label("min_rt"),  # pylint: disable=not-callable
+                func.max(ToolMetric.response_time).label("max_rt"),  # pylint: disable=not-callable
+                func.avg(ToolMetric.response_time).label("avg_rt"),  # pylint: disable=not-callable
+                func.max(ToolMetric.timestamp).label("last_time"),  # pylint: disable=not-callable
+            )
+        ).one()
+
+        total = result.total or 0
+        successful = result.successful or 0
+        failed = result.failed or 0
         failure_rate = failed / total if total > 0 else 0.0
-        min_rt = db.execute(select(func.min(ToolMetric.response_time))).scalar()
-        max_rt = db.execute(select(func.max(ToolMetric.response_time))).scalar()
-        avg_rt = db.execute(select(func.avg(ToolMetric.response_time))).scalar()
-        last_time = db.execute(select(func.max(ToolMetric.timestamp))).scalar()
 
         return {
             "total_executions": total,
             "successful_executions": successful,
             "failed_executions": failed,
             "failure_rate": failure_rate,
-            "min_response_time": min_rt,
-            "max_response_time": max_rt,
-            "avg_response_time": avg_rt,
-            "last_execution_time": last_time,
+            "min_response_time": result.min_rt,
+            "max_response_time": result.max_rt,
+            "avg_response_time": result.avg_rt,
+            "last_execution_time": result.last_time,
         }
 
     async def reset_metrics(self, db: Session, tool_id: Optional[int] = None) -> None:
 
@@ -22839,17 +22839,22 @@ function initializeRealTimeMonitoring() {
 
     // --- Gateway Events ---
     // Handlers for specific states
-    // eventSource.addEventListener("gateway_activated", (e) => handleEntityEvent("gateway", e));
+
     // eventSource.addEventListener("gateway_deactivated", (e) => handleEntityEvent("gateway", e));
+    eventSource.addEventListener("gateway_activated", (e) =>
+        handleEntityEvent("gateway", e),
+    );
     eventSource.addEventListener("gateway_offline", (e) =>
         handleEntityEvent("gateway", e),
     );
 
     // --- Tool Events ---
     // Handlers for specific states
 
-    // eventSource.addEventListener("tool_activated", (e) => handleEntityEvent("tool", e));
     // eventSource.addEventListener("tool_deactivated", (e) => handleEntityEvent("tool", e));
+    eventSource.addEventListener("tool_activated", (e) =>
+        handleEntityEvent("tool", e),
+    );
     eventSource.addEventListener("tool_offline", (e) =>
         handleEntityEvent("tool", e),
     );