From 1f3ceb7253c0597a4647385f45d0246612fc7976 Mon Sep 17 00:00:00 2001 From: Elliott Sims Date: Thu, 23 Jan 2025 16:49:39 -0700 Subject: [PATCH] CASSANDRA-20147: Fix Semaphore permit count overflow under sustained high write rate In Batch commit mode, if multiple writes arrive during a single commitlog flush it will release the haveWork semaphore more times than it's acquired. If this happens often enough without an idle period, it will eventually overflow. So, every time we acquire() the permit count should be reset to 0. This is similar to how it worked in 3.0, but with more places it's acquired. In theory this leaves a potential race, but only if 2 billion writes arrive within a single commitlog flush interval. Without this change, I believe the flusher loop would also run without waiting for a while during idle periods after sustained high load. patch by Elliott Sims (elliott@backblaze.com); reviewed by TBD for CASSANDRA-20147 --- .../cassandra/db/commitlog/AbstractCommitLogService.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/java/org/apache/cassandra/db/commitlog/AbstractCommitLogService.java b/src/java/org/apache/cassandra/db/commitlog/AbstractCommitLogService.java index cd3eb56105d6..166c42915d9e 100644 --- a/src/java/org/apache/cassandra/db/commitlog/AbstractCommitLogService.java +++ b/src/java/org/apache/cassandra/db/commitlog/AbstractCommitLogService.java @@ -203,6 +203,7 @@ public void run(Interruptible.State state) throws InterruptedException if (markerIntervalNanos <= 0) { haveWork.acquire(1); + haveWork.drain(); } else { @@ -213,6 +214,7 @@ public void run(Interruptible.State state) throws InterruptedException long wakeUpAt = pollStarted + markerIntervalNanos; if (wakeUpAt > now) haveWork.tryAcquireUntil(1, wakeUpAt); + haveWork.drain(); } } catch (Throwable t) @@ -221,6 +223,7 @@ public void run(Interruptible.State state) throws InterruptedException throw new TerminateException(); else // sleep for full poll-interval after an error, so we don't spam the log file haveWork.tryAcquire(1, markerIntervalNanos, NANOSECONDS); + haveWork.drain(); } }