Skip to content

Commit e1e2e73

Browse files
authored
Allow HPA to work when Inference is enabled (#4247)
Problem: When the Inference Extension was enabled, the additional container was not given resource specifications, which prevented the HPA from working. Solution: Add the resource specifications from the NGINX container to the inference container.
1 parent b960d89 commit e1e2e73

File tree

2 files changed

+19
-2
lines changed

2 files changed

+19
-2
lines changed

internal/controller/provisioner/objects.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1019,6 +1019,7 @@ func (p *NginxProvisioner) buildNginxPodTemplateSpec(
10191019
},
10201020
}
10211021

1022+
var containerResources corev1.ResourceRequirements
10221023
if nProxyCfg != nil && nProxyCfg.Kubernetes != nil {
10231024
var podSpec *ngfAPIv1alpha2.PodSpec
10241025
var containerSpec *ngfAPIv1alpha2.ContainerSpec
@@ -1042,7 +1043,8 @@ func (p *NginxProvisioner) buildNginxPodTemplateSpec(
10421043
if containerSpec != nil {
10431044
container := spec.Spec.Containers[0]
10441045
if containerSpec.Resources != nil {
1045-
container.Resources = *containerSpec.Resources
1046+
containerResources = *containerSpec.Resources
1047+
container.Resources = containerResources
10461048
}
10471049
container.Lifecycle = containerSpec.Lifecycle
10481050
container.VolumeMounts = append(container.VolumeMounts, containerSpec.VolumeMounts...)
@@ -1174,6 +1176,7 @@ func (p *NginxProvisioner) buildNginxPodTemplateSpec(
11741176
Image: p.cfg.GatewayPodConfig.Image,
11751177
ImagePullPolicy: pullPolicy,
11761178
Command: command,
1179+
Resources: containerResources,
11771180
SecurityContext: &corev1.SecurityContext{
11781181
AllowPrivilegeEscalation: helpers.GetPointer(false),
11791182
Capabilities: &corev1.Capabilities{

internal/controller/provisioner/objects_test.go

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1883,7 +1883,20 @@ func TestBuildNginxResourceObjects_InferenceExtension(t *testing.T) {
18831883
},
18841884
}
18851885

1886-
objects, err := provisioner.buildNginxResourceObjects("gw-nginx", gateway, &graph.EffectiveNginxProxy{})
1886+
npCfg := &graph.EffectiveNginxProxy{
1887+
Kubernetes: &ngfAPIv1alpha2.KubernetesSpec{
1888+
Deployment: &ngfAPIv1alpha2.DeploymentSpec{
1889+
Container: ngfAPIv1alpha2.ContainerSpec{
1890+
Resources: &corev1.ResourceRequirements{
1891+
Limits: corev1.ResourceList{
1892+
corev1.ResourceCPU: resource.MustParse("500m"),
1893+
},
1894+
},
1895+
},
1896+
},
1897+
},
1898+
}
1899+
objects, err := provisioner.buildNginxResourceObjects("gw-nginx", gateway, npCfg)
18871900
g.Expect(err).ToNot(HaveOccurred())
18881901

18891902
// Find the deployment object
@@ -1907,4 +1920,5 @@ func TestBuildNginxResourceObjects_InferenceExtension(t *testing.T) {
19071920
g.Expect(containers).To(HaveLen(2))
19081921
g.Expect(containers[1].Name).To(Equal("endpoint-picker-shim"))
19091922
g.Expect(containers[1].Command).To(Equal(expectedCommands))
1923+
g.Expect(containers[1].Resources.Limits).To(HaveKeyWithValue(corev1.ResourceCPU, resource.MustParse("500m")))
19101924
}

0 commit comments

Comments
 (0)