Skip to content

Commit 82debf0

Browse files
committed
Fix data race in PreTrainedTokenizer
1 parent 78da20f commit 82debf0

File tree

1 file changed

+19
-0
lines changed

1 file changed

+19
-0
lines changed

Sources/Tokenizers/Tokenizer.swift

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,9 @@ public class PreTrainedTokenizer: @unchecked Sendable, Tokenizer {
294294
/// Cache for compiled Jinja templates keyed by their literal template string
295295
private var compiledChatTemplateCache: [String: Template] = [:]
296296

297+
/// Lock to protect the compiled chat template cache from concurrent access
298+
private let cacheLock = NSLock()
299+
297300
public required init(tokenizerConfig: Config, tokenizerData: Config, strict: Bool = true) throws {
298301
var addedTokens: [String: Int] = [:]
299302
var specialTokens: [String: Int] = [:]
@@ -341,10 +344,26 @@ public class PreTrainedTokenizer: @unchecked Sendable, Tokenizer {
341344
}
342345

343346
private func compiledTemplate(for templateString: String) throws -> Template {
347+
// Fast path: check cache under lock
348+
cacheLock.lock()
344349
if let cached = compiledChatTemplateCache[templateString] {
350+
cacheLock.unlock()
345351
return cached
346352
}
353+
cacheLock.unlock()
354+
355+
// Compile template outside of lock to avoid holding lock during expensive operation
347356
let compiled = try Template(templateString)
357+
358+
// Insert into cache under lock (using double-checked locking pattern)
359+
cacheLock.lock()
360+
defer { cacheLock.unlock() }
361+
362+
// Check again in case another thread compiled the same template
363+
if let cached = compiledChatTemplateCache[templateString] {
364+
return cached
365+
}
366+
348367
compiledChatTemplateCache[templateString] = compiled
349368
return compiled
350369
}

0 commit comments

Comments
 (0)