Skip to content

Commit 27bbd04

Browse files
fix: metric submission failure should keep metrics for the next attempt (#130)
* fix: metric submission failure should keep metrics for the next attempt * Try to make tests more resilient
1 parent 2a7f7ee commit 27bbd04

File tree

6 files changed

+106
-15
lines changed

6 files changed

+106
-15
lines changed

unleashandroidsdk/src/main/java/io/getunleash/android/metrics/MetricsBucket.kt

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ data class Bucket(
1818
)
1919

2020
interface UnleashMetricsBucket {
21-
fun count(featureName: String, enabled: Boolean): Boolean
22-
fun countVariant(featureName: String, variant: Variant): Variant
21+
fun count(featureName: String, enabled: Boolean, increment: Int = 1): Boolean
22+
fun countVariant(featureName: String, variant: Variant, increment: Int = 1): Variant
2323
fun isEmpty(): Boolean
2424
}
2525

@@ -30,14 +30,14 @@ data class CountBucket(
3030
val variants: ConcurrentHashMap<Pair<String, String>, AtomicInteger> = ConcurrentHashMap()
3131
): UnleashMetricsBucket {
3232

33-
override fun count(featureName: String, enabled: Boolean): Boolean {
33+
override fun count(featureName: String, enabled: Boolean, increment: Int): Boolean {
3434
(if (enabled) yes else no)
35-
.getOrPut(featureName) { AtomicInteger(0) }.incrementAndGet()
35+
.getOrPut(featureName) { AtomicInteger(0) }.addAndGet(increment)
3636
return enabled
3737
}
3838

39-
override fun countVariant(featureName: String, variant: Variant): Variant {
40-
variants.getOrPut(Pair(featureName, variant.name)) { AtomicInteger(0) }.incrementAndGet()
39+
override fun countVariant(featureName: String, variant: Variant, increment: Int): Variant {
40+
variants.getOrPut(Pair(featureName, variant.name)) { AtomicInteger(0) }.addAndGet(increment)
4141
return variant
4242
}
4343

unleashandroidsdk/src/main/java/io/getunleash/android/metrics/MetricsReporter.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@ package io.getunleash.android.metrics
22

33
interface MetricsReporter {
44

5-
suspend fun sendMetrics()
5+
suspend fun sendMetrics(onComplete: ((Result<Unit>) -> Unit)? = null)
66
}

unleashandroidsdk/src/main/java/io/getunleash/android/metrics/MetricsSender.kt

Lines changed: 38 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import okhttp3.RequestBody.Companion.toRequestBody
1616
import okhttp3.Response
1717
import java.io.IOException
1818
import java.util.Date
19+
import java.util.concurrent.atomic.AtomicBoolean
1920
import java.util.concurrent.TimeUnit
2021

2122
class MetricsSender(
@@ -30,14 +31,15 @@ class MetricsSender(
3031
private val metricsUrl = config.proxyUrl?.toHttpUrl()?.newBuilder()?.addPathSegment("client")
3132
?.addPathSegment("metrics")?.build()
3233
private var bucket: CountBucket = CountBucket(start = Date())
34+
private val inFlight = AtomicBoolean(false)
3335
private val throttler =
3436
Throttler(
3537
TimeUnit.MILLISECONDS.toSeconds(config.metricsStrategy.interval),
3638
longestAcceptableIntervalSeconds = 300,
3739
metricsUrl.toString()
3840
)
3941

40-
override suspend fun sendMetrics() {
42+
override suspend fun sendMetrics(onComplete: ((Result<Unit>) -> Unit)?) {
4143
if (metricsUrl == null) {
4244
Log.d(TAG, "No proxy URL configured, skipping metrics reporting")
4345
return
@@ -46,12 +48,16 @@ class MetricsSender(
4648
Log.d(TAG, "No metrics to report")
4749
return
4850
}
51+
if (!inFlight.compareAndSet(false, true)) {
52+
Log.d(TAG, "Metrics report already in-flight, skipping this send")
53+
return
54+
}
4955
if (throttler.performAction()) {
5056
val toReport = swapAndFreeze()
5157
val payload = MetricsPayload(
5258
appName = config.appName,
5359
instanceId = config.instanceId,
54-
bucket = toReport
60+
bucket = toReport.first
5561
)
5662
val request = Request.Builder()
5763
.headers(applicationHeaders.toHeaders())
@@ -61,6 +67,13 @@ class MetricsSender(
6167
).build()
6268
httpClient.newCall(request).enqueue(object : Callback {
6369
override fun onFailure(call: Call, e: IOException) {
70+
mergeBack(toReport.second)
71+
inFlight.set(false)
72+
try {
73+
onComplete?.invoke(Result.failure(e))
74+
} catch (t: Throwable) {
75+
Log.w(TAG, "onComplete callback threw", t)
76+
}
6477
Log.i(TAG, "Failed to report metrics for interval", e)
6578
}
6679

@@ -72,17 +85,37 @@ class MetricsSender(
7285
throttler.handle(response.code)
7386
response.body.use { // Need to consume body to ensure we don't keep connection open
7487
}
88+
inFlight.set(false)
89+
try {
90+
onComplete?.invoke(Result.success(Unit))
91+
} catch (t: Throwable) {
92+
Log.w(TAG, "onComplete callback threw", t)
93+
}
7594
}
7695
})
7796
} else {
7897
throttler.skipped()
98+
inFlight.set(false)
7999
}
80100
}
81101

82-
private fun swapAndFreeze(): Bucket {
83-
val bucketRef = bucket
102+
private fun swapAndFreeze(): Pair<Bucket, CountBucket> {
103+
val snapshot = bucket.copy()
84104
bucket = CountBucket(start = Date())
85-
return bucketRef.copy().toBucket(bucket.start)
105+
return Pair(snapshot.toBucket(bucket.start), snapshot)
106+
}
107+
108+
// Note: this does not maintain the initial start time of the snapshot
109+
private fun mergeBack(snapshot: CountBucket) {
110+
for ((feature, count) in snapshot.yes) {
111+
bucket.count(feature, true, count.get())
112+
}
113+
for ((feature, count) in snapshot.no) {
114+
bucket.count(feature, false, count.get())
115+
}
116+
for ((pair, count) in snapshot.variants) {
117+
bucket.countVariant(pair.first, Variant(pair.second), count.get())
118+
}
86119
}
87120

88121
override fun count(featureName: String, enabled: Boolean): Boolean {

unleashandroidsdk/src/main/java/io/getunleash/android/metrics/NoOpMetrics.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,6 @@ class NoOpMetrics: MetricsHandler {
1111
return variant
1212
}
1313

14-
override suspend fun sendMetrics() {
14+
override suspend fun sendMetrics(onComplete: ((Result<Unit>) -> Unit)?) {
1515
}
1616
}

unleashandroidsdk/src/test/java/io/getunleash/android/DefaultUnleashTest.kt

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -403,9 +403,13 @@ class DefaultUnleashTest : BaseTest() {
403403

404404
// change context to force a refresh
405405
unleash.setContext(UnleashContext(userId = "2"))
406-
assertThat(togglesChecked).isEqualTo(1)
406+
await().atMost(1, TimeUnit.SECONDS).until {
407+
togglesChecked == 1
408+
}
407409
unleash.setContext(UnleashContext(userId = "3"))
408-
assertThat(togglesChecked).isEqualTo(2)
410+
await().atMost(1, TimeUnit.SECONDS).until {
411+
togglesChecked == 2
412+
}
409413
}
410414

411415
@Test

unleashandroidsdk/src/test/java/io/getunleash/android/metrics/MetricsSenderTest.kt

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,12 @@ import org.junit.Test
1414
import org.mockito.Mockito.mock
1515
import java.util.concurrent.TimeUnit
1616
import net.javacrumbs.jsonunit.assertj.assertThatJson
17+
import okhttp3.mockwebserver.MockResponse
18+
import okhttp3.mockwebserver.SocketPolicy
19+
import org.awaitility.Awaitility.await
20+
import java.math.BigDecimal
1721
import java.math.BigDecimal.valueOf
22+
import java.util.concurrent.CountDownLatch
1823

1924
class MetricsSenderTest : BaseTest() {
2025
var server: MockWebServer = MockWebServer()
@@ -91,4 +96,53 @@ class MetricsSenderTest : BaseTest() {
9196
}
9297
}
9398
}
99+
100+
@Test
101+
fun `failed send merges metrics back and next successful send includes both old and new`() = runTest {
102+
val config = configBuilder.build()
103+
val httpClient = ClientBuilder(config, mock(Context::class.java)).build("test", config.metricsStrategy)
104+
val metricsSender = MetricsSender(config, httpClient)
105+
106+
// Initial metrics that will be part of the failed request
107+
metricsSender.count("featureA", true)
108+
metricsSender.count("featureB", true)
109+
110+
val latch = CountDownLatch(1)
111+
var calls = 0
112+
// Make the server drop the connection to simulate a network failure (500, 429, etc. would throttle further requests)
113+
server.enqueue(MockResponse().setSocketPolicy(SocketPolicy.DISCONNECT_AT_START))
114+
metricsSender.sendMetrics { calls++; latch.countDown() }
115+
metricsSender.count("featureA", true) // in between the failed send and the retry, add another metric
116+
metricsSender.sendMetrics { calls++ } // This should be skipped since a send is in-flight
117+
metricsSender.sendMetrics { calls++ } // This should be skipped since a send is in-flight
118+
119+
// Ensure the server received the (failed) request (may be disconnected)
120+
server.takeRequest(1, TimeUnit.SECONDS)
121+
latch.await(1, TimeUnit.SECONDS)
122+
assertThat(calls).isEqualTo(1) // Only the first call should have been executed
123+
124+
// Add another metric after the failed send - should be merged with the snapshot
125+
metricsSender.count("featureB", true)
126+
127+
// Now enqueue a successful response for the retry/send that should contain both counts
128+
server.enqueue(MockResponse().setResponseCode(200))
129+
metricsSender.sendMetrics()
130+
131+
val request = server.takeRequest(1, TimeUnit.SECONDS)!!
132+
assertThat(request.method).isEqualTo("POST")
133+
assertThat(request.path).isEqualTo("/proxy/client/metrics")
134+
// The combined count for featureA should be 2 (one from failed snapshot + one new) and for featureB should be 1 (from failed snapshot)
135+
assertThatJson(request.body.readUtf8()) {
136+
node("bucket").apply {
137+
node("toggles").apply {
138+
node("featureA").apply {
139+
node("yes").isNumber().isEqualTo(BigDecimal(2))
140+
}
141+
node("featureB").apply {
142+
node("yes").isNumber().isEqualTo(BigDecimal(2))
143+
}
144+
}
145+
}
146+
}
147+
}
94148
}

0 commit comments

Comments
 (0)