Skip to content

Commit 5cb05ec

Browse files
committed
ghreconciler: handle grpc ResourceExhausted with jitter
Signed-off-by: Nghia Tran <[email protected]>
1 parent a8a15eb commit 5cb05ec

File tree

2 files changed

+112
-3
lines changed

2 files changed

+112
-3
lines changed

pkg/githubreconciler/reconciler.go

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,12 @@ import (
99
"context"
1010
"errors"
1111
"fmt"
12+
"math/rand"
1213
"time"
1314

1415
"github.com/google/go-github/v75/github"
16+
"google.golang.org/grpc/codes"
17+
"google.golang.org/grpc/status"
1518

1619
"github.com/chainguard-dev/clog"
1720
"github.com/chainguard-dev/terraform-infra-common/pkg/workqueue"
@@ -56,6 +59,16 @@ func (r *Resource) String() string {
5659
return fmt.Sprintf("%s/%s#%d", r.Owner, r.Repo, r.Number)
5760
}
5861

62+
// addJitter adds random jitter to a duration to avoid thundering herd.
63+
// Jitter is 0% to +10% of the base duration.
64+
//
65+
//nolint:gosec // Using weak random for jitter is fine, not cryptographic
66+
func addJitter(d time.Duration) time.Duration {
67+
// Add jitter between 0% and +10%
68+
jitter := time.Duration(rand.Int63n(int64(d / 10)))
69+
return d + jitter
70+
}
71+
5972
// Reconciler manages the reconciliation of GitHub resources.
6073
type Reconciler struct {
6174
workqueue.UnimplementedWorkqueueServiceServer
@@ -149,9 +162,10 @@ func (r *Reconciler) Reconcile(ctx context.Context, url string) error {
149162
if errors.As(err, &rateLimitErr) {
150163
// Calculate duration until rate limit resets
151164
resetTime := rateLimitErr.Rate.Reset.Time
165+
delay := addJitter(time.Until(resetTime))
152166
clog.FromContext(ctx).With("reset_at", resetTime).
153167
Warn("Rate limited, requeueing after rate limit reset")
154-
return workqueue.RequeueAfter(time.Until(resetTime))
168+
return workqueue.RequeueAfter(delay)
155169
}
156170

157171
// Check if it's an abuse rate limit error
@@ -162,9 +176,19 @@ func (r *Reconciler) Reconcile(ctx context.Context, url string) error {
162176
if abuseRateLimitErr.RetryAfter != nil {
163177
retryAfter = *abuseRateLimitErr.RetryAfter
164178
}
165-
clog.FromContext(ctx).With("retry_after", retryAfter).
179+
delay := addJitter(retryAfter)
180+
clog.FromContext(ctx).With("retry_after", delay).
166181
Warn("Abuse rate limit detected, requeueing after retry period")
167-
return workqueue.RequeueAfter(retryAfter)
182+
return workqueue.RequeueAfter(delay)
183+
}
184+
185+
// Check if it's a gRPC ResourceExhausted error
186+
if status.Code(err) == codes.ResourceExhausted {
187+
// Resource exhausted - use a conservative 1 minute retry
188+
delay := addJitter(time.Minute)
189+
clog.FromContext(ctx).With("retry_after", delay).
190+
Warn("gRPC ResourceExhausted detected, requeueing after retry period")
191+
return workqueue.RequeueAfter(delay)
168192
}
169193
}
170194
return err

pkg/githubreconciler/reconciler_test.go

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,14 @@ import (
1111
"fmt"
1212
"strings"
1313
"testing"
14+
"time"
1415

1516
"github.com/google/go-github/v75/github"
1617
"golang.org/x/oauth2"
18+
"google.golang.org/grpc/codes"
19+
"google.golang.org/grpc/status"
20+
21+
"github.com/chainguard-dev/terraform-infra-common/pkg/workqueue"
1722
)
1823

1924
// mockTokenSourceFunc returns a mock token source
@@ -245,3 +250,83 @@ func TestReconciler_GetStateManager(t *testing.T) {
245250
func contains(s, substr string) bool {
246251
return strings.Contains(s, substr)
247252
}
253+
254+
func TestReconciler_RateLimitHandling(t *testing.T) {
255+
tests := []struct {
256+
name string
257+
reconcilerError error
258+
wantRequeue bool
259+
wantRequeueAfter time.Duration
260+
wantErrContains string
261+
}{{
262+
name: "gRPC ResourceExhausted error triggers requeue",
263+
reconcilerError: status.Error(codes.ResourceExhausted, "resource exhausted"),
264+
wantRequeue: true,
265+
wantRequeueAfter: time.Minute,
266+
}, {
267+
name: "wrapped gRPC ResourceExhausted error triggers requeue",
268+
reconcilerError: fmt.Errorf("failed to call API: %w", status.Error(codes.ResourceExhausted, "quota exceeded")),
269+
wantRequeue: true,
270+
wantRequeueAfter: time.Minute,
271+
}, {
272+
name: "GitHub RateLimitError triggers requeue",
273+
reconcilerError: &github.RateLimitError{Rate: github.Rate{Reset: github.Timestamp{Time: time.Now().Add(5 * time.Minute)}}},
274+
wantRequeue: true,
275+
wantRequeueAfter: 5 * time.Minute,
276+
}, {
277+
name: "GitHub AbuseRateLimitError with RetryAfter triggers requeue",
278+
reconcilerError: &github.AbuseRateLimitError{RetryAfter: ptrDuration(2 * time.Minute)},
279+
wantRequeue: true,
280+
wantRequeueAfter: 2 * time.Minute,
281+
}, {
282+
name: "GitHub AbuseRateLimitError without RetryAfter uses default",
283+
reconcilerError: &github.AbuseRateLimitError{},
284+
wantRequeue: true,
285+
wantRequeueAfter: time.Minute,
286+
}, {
287+
name: "non-rate-limit error does not trigger requeue",
288+
reconcilerError: errors.New("regular error"),
289+
wantRequeue: false,
290+
wantErrContains: "regular error",
291+
}}
292+
293+
for _, tt := range tests {
294+
t.Run(tt.name, func(t *testing.T) {
295+
ctx := context.Background()
296+
cc := NewClientCache(mockTokenSourceFunc)
297+
298+
r := NewReconciler(cc, WithReconciler(func(_ context.Context, _ *Resource, _ *github.Client) error {
299+
return tt.reconcilerError
300+
}))
301+
302+
err := r.Reconcile(ctx, "https://github.com/owner/repo/issues/123")
303+
304+
if tt.wantRequeue {
305+
delay, ok := workqueue.GetRequeueDelay(err)
306+
if !ok {
307+
t.Errorf("Expected requeue error, got = %v", err)
308+
}
309+
// Jitter adds 0% to +10% to the base delay
310+
minDelay := tt.wantRequeueAfter
311+
maxDelay := tt.wantRequeueAfter + (tt.wantRequeueAfter / 10)
312+
if delay < minDelay || delay > maxDelay {
313+
t.Errorf("Requeue delay = %v, want between %v and %v", delay, minDelay, maxDelay)
314+
}
315+
} else {
316+
if err == nil {
317+
t.Error("Expected error, got nil")
318+
}
319+
if _, ok := workqueue.GetRequeueDelay(err); ok {
320+
t.Errorf("Did not expect requeue error, got = %v", err)
321+
}
322+
if tt.wantErrContains != "" && !contains(err.Error(), tt.wantErrContains) {
323+
t.Errorf("Error = %v, want error containing %v", err, tt.wantErrContains)
324+
}
325+
}
326+
})
327+
}
328+
}
329+
330+
func ptrDuration(d time.Duration) *time.Duration {
331+
return &d
332+
}

0 commit comments

Comments
 (0)