Skip to content

Commit d2e41e0

Browse files
authored
[CONTINT-4898] Add container metric support for any CRI compliant runtime (#43317)
### What does this PR do? Adds container metric support for any CRI compliant runtime provided as a `cri_socket_path`. ### Motivation Despite allowing custom `cri_socket_path` values, we actually only allow `containerd` or `crio`. We want to support metric collection for any runtime if it is CRI compliant. ### Describe how you validated your changes 1. Stand up an environment running a nonstandard runtime (for example, pouch) ```bash $ kubectl get nodes -o wide NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME pouch-qa Ready control-plane 5d2h v1.25.16 192.168.64.50 <none> Ubuntu 24.04.3 LTS 6.8.0-87-generic pouch://1.3.1-ba854f2d ``` 2. Deploy an agent built off of this feature branch and specify the custom socket ```yaml datadog: ... criSocketPath: /var/run/pouchcri.sock agents: ... volumes: - name: pouchcri-sock hostPath: path: /var/run/pouchcri.sock # path on the node type: Socket volumeMounts: - name: pouchcri-sock mountPath: /var/run/pouchcri.sock readOnly: true ``` 3. Exec onto the agent and validate that the `nonstandard-cri-runtime` feature was detected by executing `agent status` ```bash $ agent status ... ============= Autodiscovery ============= Enabled Features ================ cri kube_orchestratorexplorer kubernetes nonstandard-cri-runtime ... ``` 4. Validate that container metrics work in the UI: <img width="1125" height="478" alt="Screenshot 2025-11-24 at 3 42 10 PM" src="https://github.com/user-attachments/assets/d6ec023b-3f67-4c32-8c81-8ca34945802f" /> <img width="1310" height="423" alt="Screenshot 2025-11-24 at 3 40 51 PM" src="https://github.com/user-attachments/assets/79a40cc5-e9ee-45e9-b498-7e062a682d0a" /> ### Additional Notes Co-authored-by: justin.lesko <[email protected]>
1 parent facbf5f commit d2e41e0

File tree

6 files changed

+42
-2
lines changed

6 files changed

+42
-2
lines changed

pkg/config/env/environment_container_features.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,4 +37,7 @@ const (
3737
PodResources Feature = "podresources"
3838
// NVML library present for GPU detection
3939
NVML Feature = "nvml"
40+
// NonstandardCRIRuntime is a fallback value for when customers supply a CRI compliant runtime via the
41+
// cri_socket_path configuration field
42+
NonstandardCRIRuntime = "nonstandard-cri-runtime"
4043
)

pkg/config/env/environment_containers.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ func init() {
5151
registerFeature(Podman)
5252
registerFeature(PodResources)
5353
registerFeature(NVML)
54+
registerFeature(NonstandardCRIRuntime)
5455
}
5556

5657
// IsAnyContainerFeaturePresent checks if any of known container features is present
@@ -65,7 +66,8 @@ func IsAnyContainerFeaturePresent() bool {
6566
IsFeaturePresent(ECSManagedInstances) ||
6667
IsFeaturePresent(EKSFargate) ||
6768
IsFeaturePresent(CloudFoundry) ||
68-
IsFeaturePresent(Podman)
69+
IsFeaturePresent(Podman) ||
70+
IsFeaturePresent(NonstandardCRIRuntime)
6971
}
7072

7173
func detectContainerFeatures(features FeatureMap, cfg model.Reader) {
@@ -144,6 +146,8 @@ func detectCriRuntimes(features FeatureMap, cfg model.Reader) {
144146
mergeContainerdNamespaces(cfg)
145147
} else if strings.Contains(criSocket, "crio") {
146148
features[Crio] = struct{}{}
149+
} else {
150+
features[NonstandardCRIRuntime] = struct{}{}
147151
}
148152
}
149153
}

pkg/util/containers/metrics/cri/collector.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,8 @@ func newCRICollector(cache *provider.Cache) (provider.CollectorMetadata, error)
5858
return provider.CollectorMetadata{
5959
ID: collectorID,
6060
Collectors: provider.CollectorCatalog{
61-
provider.NewRuntimeMetadata(string(provider.RuntimeNameCRIO), ""): provider.MakeCached(collectorID, cache, collectors),
61+
provider.NewRuntimeMetadata(string(provider.RuntimeNameCRIO), ""): provider.MakeCached(collectorID, cache, collectors),
62+
provider.NewRuntimeMetadata(string(provider.RuntimeNameCRINonstandard), ""): provider.MakeCached(collectorID, cache, collectors),
6263
},
6364
}, nil
6465
}

pkg/util/containers/metrics/kubelet/collector.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ func newKubeletCollector(_ *provider.Cache, wmeta workloadmeta.Component) (provi
8989
provider.NewRuntimeMetadata(string(provider.RuntimeNameContainerd), string(provider.RuntimeFlavorKata)): collectors,
9090
provider.NewRuntimeMetadata(string(provider.RuntimeNameCRIO), ""): collectors,
9191
provider.NewRuntimeMetadata(string(provider.RuntimeNameDocker), ""): collectors,
92+
provider.NewRuntimeMetadata(string(provider.RuntimeNameCRINonstandard), ""): collectors,
9293
},
9394
}, nil
9495
}

pkg/util/containers/metrics/provider/provider.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ import (
1414
"sync"
1515

1616
workloadmeta "github.com/DataDog/datadog-agent/comp/core/workloadmeta/def"
17+
"github.com/DataDog/datadog-agent/pkg/config/env"
18+
"github.com/DataDog/datadog-agent/pkg/util/log"
1719
"github.com/DataDog/datadog-agent/pkg/util/option"
1820
"github.com/DataDog/datadog-agent/pkg/util/retry"
1921
)
@@ -30,6 +32,7 @@ const (
3032
RuntimeNamePodman Runtime = "podman"
3133
RuntimeNameECSFargate Runtime = "ecsfargate"
3234
RuntimeNameECSManagedInstances Runtime = "ecsmanagedinstances"
35+
RuntimeNameCRINonstandard Runtime = "cri-nonstandard"
3336
)
3437

3538
var (
@@ -58,6 +61,7 @@ var (
5861
RuntimeNamePodman,
5962
RuntimeNameECSFargate,
6063
RuntimeNameECSManagedInstances,
64+
RuntimeNameCRINonstandard,
6165
}
6266

6367
// AllWindowsRuntimes lists all runtimes available on Windows
@@ -67,7 +71,11 @@ var (
6771
RuntimeNameContainerd,
6872
RuntimeNameECSFargate,
6973
RuntimeNameECSManagedInstances,
74+
RuntimeNameCRINonstandard,
7075
}
76+
77+
// nonstandardMetadata is used as a map key in GetCollector() when the NonstandardCRIRuntime feature is present
78+
nonstandardMetadata = NewRuntimeMetadata(string(RuntimeNameCRINonstandard), "")
7179
)
7280

7381
// RuntimeFlavor is a typed string for supported container runtime flavors
@@ -148,6 +156,17 @@ func (mp *GenericProvider) GetCollector(r RuntimeMetadata) Collector {
148156
return runtime
149157
}
150158

159+
// if the nonstandard runtime feature is present that means
160+
// the user supplied a runtime socket that does not map to any of our known
161+
// runtimes: containerd, cri-o
162+
if env.IsFeaturePresent(env.NonstandardCRIRuntime) {
163+
log.Debugf("Overriding collector runtime from %s to %s", r.String(), nonstandardMetadata.String())
164+
165+
if runtime, found := mp.collectors[nonstandardMetadata]; found {
166+
return runtime
167+
}
168+
}
169+
151170
return nil
152171
}
153172

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# Each section from every release note are combined when the
2+
# CHANGELOG.rst is rendered. So the text needs to be worded so that
3+
# it does not depend on any information only available in another
4+
# section. This may mean repeating some details, but each section
5+
# must be readable independently of the other.
6+
#
7+
# Each section note must be formatted as reStructuredText.
8+
---
9+
enhancements:
10+
- |
11+
Add container metric support for any CRI compliant runtime specified in
12+
the `cri_socket_path` configuration.

0 commit comments

Comments
 (0)