Skip to content

Commit dc0cca4

Browse files
committed
more efficient rewrite fetching per request.
1 parent 3e9939f commit dc0cca4

File tree

9 files changed

+534
-162
lines changed

9 files changed

+534
-162
lines changed

apix/v1alpha2/inferencemodelrewrite_types.go

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -57,20 +57,25 @@ type InferenceModelRewriteSpec struct {
5757
// If multiple InferenceModelRewrite resources target the same
5858
// InferencePool, the controller will merge them based on precedence.
5959
//
60-
// **Timestamp Wins:** If two rules from different rewrites all matches,
61-
// the rule from the *oldest*
62-
// InferenceModelRewrite resource (determined by
63-
// metadata.creationTimestamp) will be used.
60+
// Across all rules specified on applicable rewrites, precedence MUST be
61+
// given to the match having an "Exact" model match over a generic match
62+
// (a rule with an empty `matches` array).
63+
//
64+
// If ties still exist across multiple InferenceModelRewrite resources (e.g.
65+
// two rewrites both have an exact match for the same model), matching
66+
// precedence MUST be determined by the oldest resource based on
67+
// creation timestamp.
68+
//
69+
// If ties still exist within a single InferenceModelRewrite resource, the
70+
// FIRST matching rule (in list order) is used.
6471
// +required
6572
Rules []InferenceModelRewriteRule `json:"rules"`
6673
}
6774

6875
// InferenceModelRewriteRule defines the match criteria and corresponding action.
69-
//
70-
// A specific model name can only be matched by one rule across all
71-
// rules attached to the same InferencePool. If multiple rules attempt
72-
// to match the same model name, the oldest rule (by creationTimestamp)
73-
// will be the only one considered valid.
76+
// For details on how precedence is determined across multiple rules and
77+
// InferenceModelRewrite resources, see the "Precedence and Conflict Resolution"
78+
// section in InferenceModelRewriteSpec.
7479
type InferenceModelRewriteRule struct {
7580
// Matches defines the criteria for matching a request.
7681
// If multiple match criteria are specified, a request matches if

config/crd/bases/inference.networking.x-k8s.io_inferencemodelrewrites.yaml

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -74,11 +74,9 @@ spec:
7474
items:
7575
description: |-
7676
InferenceModelRewriteRule defines the match criteria and corresponding action.
77-
78-
A specific model name can only be matched by one rule across all
79-
rules attached to the same InferencePool. If multiple rules attempt
80-
to match the same model name, the oldest rule (by creationTimestamp)
81-
will be the only one considered valid.
77+
For details on how precedence is determined across multiple rules and
78+
InferenceModelRewrite resources, see the "Precedence and Conflict Resolution"
79+
section in InferenceModelRewriteSpec.
8280
properties:
8381
matches:
8482
items:

docs/proposals/1816-inferenceomodelrewrite/README.md

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -64,20 +64,25 @@ type InferenceModelRewriteSpec struct {
6464
// If multiple InferenceModelRewrite resources target the same
6565
// InferencePool, the controller will merge them based on precedence.
6666
//
67-
// **Timestamp Wins:** If two rules from different rewrite all matches,
68-
// the rule from the *oldest*
69-
// InferenceModelRewrite resource (determined by
70-
// metadata.creationTimestamp) will be used.
67+
// Across all rules specified on applicable rewrites, precedence MUST be
68+
// given to the match having an "Exact" model match over a generic match
69+
// (a rule with an empty `matches` array).
70+
//
71+
// If ties still exist across multiple InferenceModelRewrite resources (e.g.
72+
// two rewrites both have an exact match for the same model), matching
73+
// precedence MUST be determined by the oldest resource based on
74+
// creation timestamp.
75+
//
76+
// If ties still exist within a single InferenceModelRewrite resource, the
77+
// FIRST matching rule (in list order) is used.
7178
// +required
7279
Rules []InferenceModelRewriteRule `json:"rules"`
7380
}
7481

7582
// InferenceModelRewriteRule defines the match criteria and corresponding action.
76-
//
77-
// A specific model name can only be matched by one rule across all
78-
// rewrites attached to the same InferencePool. If multiple rules attempt
79-
// to match the same model name, the oldest rule (by creationTimestamp)
80-
// will be the only one considered valid.
83+
// For details on how precedence is determined across multiple rules and
84+
// InferenceModelRewrite resources, see the "Precedence and Conflict Resolution"
85+
// section in InferenceModelRewriteSpec.
8186
type InferenceModelRewriteRule struct {
8287
// Matches defines the criteria for matching a request.
8388
// If multiple match criteria are specified, a request matches if

pkg/epp/controller/inferencemodelrewrite_reconciler_test.go

Lines changed: 51 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -41,79 +41,59 @@ import (
4141

4242
var (
4343
poolForRewrite = utiltest.MakeInferencePool("test-pool1").Namespace("ns1").ObjRef()
44-
rewrite1 = makeInferenceModelRewrite("rewrite1").
45-
Namespace(poolForRewrite.Namespace).
46-
PoolName(poolForRewrite.Name).
47-
CreationTimestamp(metav1.Unix(1000, 0)).
48-
ObjRef()
49-
rewrite1Pool2 = makeInferenceModelRewrite(rewrite1.Name).
50-
Namespace(rewrite1.Namespace).
51-
PoolName("test-pool2").
52-
CreationTimestamp(metav1.Unix(1001, 0)).
53-
ObjRef()
54-
rewrite1Updated = makeInferenceModelRewrite(rewrite1.Name).
55-
Namespace(rewrite1.Namespace).
56-
PoolName(poolForRewrite.Name).
57-
CreationTimestamp(metav1.Unix(1003, 0)).
58-
Rules([]v1alpha2.InferenceModelRewriteRule{{}}).
59-
ObjRef()
60-
rewrite1Deleted = makeInferenceModelRewrite(rewrite1.Name).
61-
Namespace(rewrite1.Namespace).
62-
PoolName(poolForRewrite.Name).
63-
CreationTimestamp(metav1.Unix(1004, 0)).
64-
DeletionTimestamp().
65-
ObjRef()
66-
rewrite2 = makeInferenceModelRewrite("rewrite2").
67-
Namespace(poolForRewrite.Namespace).
68-
PoolName(poolForRewrite.Name).
69-
CreationTimestamp(metav1.Unix(1000, 0)).
70-
ObjRef()
71-
)
72-
73-
type inferenceModelRewriteBuilder struct {
74-
*v1alpha2.InferenceModelRewrite
75-
}
76-
77-
func makeInferenceModelRewrite(name string) *inferenceModelRewriteBuilder {
78-
return &inferenceModelRewriteBuilder{
79-
&v1alpha2.InferenceModelRewrite{
80-
ObjectMeta: metav1.ObjectMeta{
81-
Name: name,
82-
},
44+
rewrite1 = &v1alpha2.InferenceModelRewrite{
45+
ObjectMeta: metav1.ObjectMeta{
46+
Name: "rewrite1",
47+
Namespace: poolForRewrite.Namespace,
48+
CreationTimestamp: metav1.Unix(1000, 0),
49+
},
50+
Spec: v1alpha2.InferenceModelRewriteSpec{
51+
PoolRef: &v1alpha2.PoolObjectReference{Name: v1alpha2.ObjectName(poolForRewrite.Name)},
8352
},
8453
}
85-
}
86-
87-
func (b *inferenceModelRewriteBuilder) Namespace(ns string) *inferenceModelRewriteBuilder {
88-
b.ObjectMeta.Namespace = ns
89-
return b
90-
}
91-
92-
func (b *inferenceModelRewriteBuilder) PoolName(name string) *inferenceModelRewriteBuilder {
93-
b.Spec.PoolRef = &v1alpha2.PoolObjectReference{}
94-
b.Spec.PoolRef.Name = v1alpha2.ObjectName(name)
95-
return b
96-
}
97-
98-
func (b *inferenceModelRewriteBuilder) CreationTimestamp(t metav1.Time) *inferenceModelRewriteBuilder {
99-
b.ObjectMeta.CreationTimestamp = t
100-
return b
101-
}
102-
103-
func (b *inferenceModelRewriteBuilder) DeletionTimestamp() *inferenceModelRewriteBuilder {
104-
now := metav1.Now()
105-
b.ObjectMeta.DeletionTimestamp = &now
106-
return b
107-
}
108-
109-
func (b *inferenceModelRewriteBuilder) Rules(rules []v1alpha2.InferenceModelRewriteRule) *inferenceModelRewriteBuilder {
110-
b.Spec.Rules = rules
111-
return b
112-
}
113-
114-
func (b *inferenceModelRewriteBuilder) ObjRef() *v1alpha2.InferenceModelRewrite {
115-
return b.InferenceModelRewrite
116-
}
54+
rewrite1Pool2 = &v1alpha2.InferenceModelRewrite{
55+
ObjectMeta: metav1.ObjectMeta{
56+
Name: rewrite1.Name,
57+
Namespace: rewrite1.Namespace,
58+
CreationTimestamp: metav1.Unix(1001, 0),
59+
},
60+
Spec: v1alpha2.InferenceModelRewriteSpec{
61+
PoolRef: &v1alpha2.PoolObjectReference{Name: "test-pool2"},
62+
},
63+
}
64+
rewrite1Updated = &v1alpha2.InferenceModelRewrite{
65+
ObjectMeta: metav1.ObjectMeta{
66+
Name: rewrite1.Name,
67+
Namespace: rewrite1.Namespace,
68+
CreationTimestamp: metav1.Unix(1003, 0),
69+
},
70+
Spec: v1alpha2.InferenceModelRewriteSpec{
71+
PoolRef: &v1alpha2.PoolObjectReference{Name: v1alpha2.ObjectName(poolForRewrite.Name)},
72+
Rules: []v1alpha2.InferenceModelRewriteRule{{}},
73+
},
74+
}
75+
rewrite1Deleted = &v1alpha2.InferenceModelRewrite{
76+
ObjectMeta: metav1.ObjectMeta{
77+
Name: rewrite1.Name,
78+
Namespace: rewrite1.Namespace,
79+
CreationTimestamp: metav1.Unix(1004, 0),
80+
DeletionTimestamp: &metav1.Time{Time: time.Now()},
81+
},
82+
Spec: v1alpha2.InferenceModelRewriteSpec{
83+
PoolRef: &v1alpha2.PoolObjectReference{Name: v1alpha2.ObjectName(poolForRewrite.Name)},
84+
},
85+
}
86+
rewrite2 = &v1alpha2.InferenceModelRewrite{
87+
ObjectMeta: metav1.ObjectMeta{
88+
Name: "rewrite2",
89+
Namespace: poolForRewrite.Namespace,
90+
CreationTimestamp: metav1.Unix(1001, 0),
91+
},
92+
Spec: v1alpha2.InferenceModelRewriteSpec{
93+
PoolRef: &v1alpha2.PoolObjectReference{Name: v1alpha2.ObjectName(poolForRewrite.Name)},
94+
},
95+
}
96+
)
11797

11898
func TestInferenceModelRewriteReconciler(t *testing.T) {
11999
tests := []struct {

pkg/epp/datastore/datastore.go

Lines changed: 45 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ type Datastore interface {
6363
// InferenceModelRewrite operations
6464
RewriteSet(infModelRewrite *v1alpha2.InferenceModelRewrite)
6565
RewriteDelete(namespacedName types.NamespacedName)
66+
RewriteGet(modelName string) *v1alpha2.InferenceModelRewriteRule
6667
RewriteGetAll() []*v1alpha2.InferenceModelRewrite
6768

6869
// PodList lists pods matching the given predicate.
@@ -77,9 +78,9 @@ type Datastore interface {
7778
func NewDatastore(parentCtx context.Context, epFactory datalayer.EndpointFactory, modelServerMetricsPort int32) Datastore {
7879
store := &datastore{
7980
parentCtx: parentCtx,
80-
poolAndObjectivesMu: sync.RWMutex{},
81+
mu: sync.RWMutex{},
8182
objectives: make(map[string]*v1alpha2.InferenceObjective),
82-
rewrites: make(map[types.NamespacedName]*v1alpha2.InferenceModelRewrite),
83+
rewrites: NewModelRewriteStore(),
8384
pods: &sync.Map{},
8485
modelServerMetricsPort: modelServerMetricsPort,
8586
epf: epFactory,
@@ -90,13 +91,13 @@ func NewDatastore(parentCtx context.Context, epFactory datalayer.EndpointFactory
9091
type datastore struct {
9192
// parentCtx controls the lifecycle of the background metrics goroutines that spawn up by the datastore.
9293
parentCtx context.Context
93-
// poolAndObjectivesMu is used to synchronize access to pool and the objectives map.
94-
poolAndObjectivesMu sync.RWMutex
95-
pool *v1.InferencePool
94+
// mu is used to synchronize access to pool, objectives, and rewrites.
95+
mu sync.RWMutex
96+
pool *v1.InferencePool
9697
// key: InferenceObjective name, value: *InferenceObjective
9798
objectives map[string]*v1alpha2.InferenceObjective
98-
// key: types.NamespacedName, value: *v1alpha2.InferenceModelRewrite
99-
rewrites map[types.NamespacedName]*v1alpha2.InferenceModelRewrite
99+
// rewrites store for InferenceModelRewrite objects.
100+
rewrites *ModelRewriteStore
100101
// key: types.NamespacedName, value: backendmetrics.PodMetrics
101102
pods *sync.Map
102103
// modelServerMetricsPort metrics port from EPP command line argument
@@ -106,11 +107,11 @@ type datastore struct {
106107
}
107108

108109
func (ds *datastore) Clear() {
109-
ds.poolAndObjectivesMu.Lock()
110-
defer ds.poolAndObjectivesMu.Unlock()
110+
ds.mu.Lock()
111+
defer ds.mu.Unlock()
111112
ds.pool = nil
112113
ds.objectives = make(map[string]*v1alpha2.InferenceObjective)
113-
ds.rewrites = make(map[types.NamespacedName]*v1alpha2.InferenceModelRewrite)
114+
ds.rewrites = NewModelRewriteStore()
114115
// stop all pods go routines before clearing the pods map.
115116
ds.pods.Range(func(_, v any) bool {
116117
ds.epf.ReleaseEndpoint(v.(backendmetrics.PodMetrics))
@@ -126,8 +127,8 @@ func (ds *datastore) PoolSet(ctx context.Context, reader client.Reader, pool *v1
126127
return nil
127128
}
128129
logger := log.FromContext(ctx)
129-
ds.poolAndObjectivesMu.Lock()
130-
defer ds.poolAndObjectivesMu.Unlock()
130+
ds.mu.Lock()
131+
defer ds.mu.Unlock()
131132

132133
oldPool := ds.pool
133134
ds.pool = pool
@@ -148,23 +149,23 @@ func (ds *datastore) PoolSet(ctx context.Context, reader client.Reader, pool *v1
148149
}
149150

150151
func (ds *datastore) PoolGet() (*v1.InferencePool, error) {
151-
ds.poolAndObjectivesMu.RLock()
152-
defer ds.poolAndObjectivesMu.RUnlock()
152+
ds.mu.RLock()
153+
defer ds.mu.RUnlock()
153154
if !ds.PoolHasSynced() {
154155
return nil, errPoolNotSynced
155156
}
156157
return ds.pool, nil
157158
}
158159

159160
func (ds *datastore) PoolHasSynced() bool {
160-
ds.poolAndObjectivesMu.RLock()
161-
defer ds.poolAndObjectivesMu.RUnlock()
161+
ds.mu.RLock()
162+
defer ds.mu.RUnlock()
162163
return ds.pool != nil
163164
}
164165

165166
func (ds *datastore) PoolLabelsMatch(podLabels map[string]string) bool {
166-
ds.poolAndObjectivesMu.RLock()
167-
defer ds.poolAndObjectivesMu.RUnlock()
167+
ds.mu.RLock()
168+
defer ds.mu.RUnlock()
168169
if ds.pool == nil {
169170
return false
170171
}
@@ -173,59 +174,57 @@ func (ds *datastore) PoolLabelsMatch(podLabels map[string]string) bool {
173174
return poolSelector.Matches(podSet)
174175
}
175176

177+
// /// InferenceObjective APIs ///
176178
func (ds *datastore) ObjectiveSet(infObjective *v1alpha2.InferenceObjective) {
177-
ds.poolAndObjectivesMu.Lock()
178-
defer ds.poolAndObjectivesMu.Unlock()
179-
// Set the objective.
179+
ds.mu.Lock()
180+
defer ds.mu.Unlock()
180181
ds.objectives[infObjective.Name] = infObjective
181182
}
182183

183184
func (ds *datastore) ObjectiveGet(objectiveName string) *v1alpha2.InferenceObjective {
184-
ds.poolAndObjectivesMu.RLock()
185-
defer ds.poolAndObjectivesMu.RUnlock()
186-
iObj, ok := ds.objectives[objectiveName]
187-
if !ok {
188-
return nil
189-
}
190-
return iObj
185+
ds.mu.RLock()
186+
defer ds.mu.RUnlock()
187+
return ds.objectives[objectiveName]
191188
}
192189

193190
func (ds *datastore) ObjectiveDelete(namespacedName types.NamespacedName) {
194-
ds.poolAndObjectivesMu.Lock()
195-
defer ds.poolAndObjectivesMu.Unlock()
191+
ds.mu.Lock()
192+
defer ds.mu.Unlock()
196193
delete(ds.objectives, namespacedName.Name)
197194
}
198195

199196
func (ds *datastore) ObjectiveGetAll() []*v1alpha2.InferenceObjective {
200-
ds.poolAndObjectivesMu.RLock()
201-
defer ds.poolAndObjectivesMu.RUnlock()
202-
res := []*v1alpha2.InferenceObjective{}
197+
ds.mu.RLock()
198+
defer ds.mu.RUnlock()
199+
res := make([]*v1alpha2.InferenceObjective, 0, len(ds.objectives))
203200
for _, v := range ds.objectives {
204201
res = append(res, v)
205202
}
206203
return res
207204
}
208205

209206
func (ds *datastore) RewriteSet(infModelRewrite *v1alpha2.InferenceModelRewrite) {
210-
ds.poolAndObjectivesMu.Lock()
211-
defer ds.poolAndObjectivesMu.Unlock()
212-
ds.rewrites[types.NamespacedName{Name: infModelRewrite.Name, Namespace: infModelRewrite.Namespace}] = infModelRewrite
207+
ds.mu.Lock()
208+
defer ds.mu.Unlock()
209+
ds.rewrites.Set(infModelRewrite)
213210
}
214211

215212
func (ds *datastore) RewriteDelete(namespacedName types.NamespacedName) {
216-
ds.poolAndObjectivesMu.Lock()
217-
defer ds.poolAndObjectivesMu.Unlock()
218-
delete(ds.rewrites, namespacedName)
213+
ds.mu.Lock()
214+
defer ds.mu.Unlock()
215+
ds.rewrites.Delete(namespacedName)
216+
}
217+
218+
func (ds *datastore) RewriteGet(modelName string) *v1alpha2.InferenceModelRewriteRule {
219+
ds.mu.RLock()
220+
defer ds.mu.RUnlock()
221+
return ds.rewrites.GetRule(modelName)
219222
}
220223

221224
func (ds *datastore) RewriteGetAll() []*v1alpha2.InferenceModelRewrite {
222-
ds.poolAndObjectivesMu.RLock()
223-
defer ds.poolAndObjectivesMu.RUnlock()
224-
res := []*v1alpha2.InferenceModelRewrite{}
225-
for _, v := range ds.rewrites {
226-
res = append(res, v)
227-
}
228-
return res
225+
ds.mu.RLock()
226+
defer ds.mu.RUnlock()
227+
return ds.rewrites.GetAll()
229228
}
230229

231230
// /// Pods/endpoints APIs ///

0 commit comments

Comments
 (0)