From 4b4008e6ce022248a76f8842ed7b7ab0e34be73f Mon Sep 17 00:00:00 2001 From: Mattt Zmuda Date: Wed, 10 Sep 2025 03:20:10 -0700 Subject: [PATCH 01/12] Disable VSCode automatic generation of launch configuration --- .vscode/settings.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 9b00789e..c263d4a0 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -35,6 +35,6 @@ "swift.swiftEnvironmentVariables": { "DEVELOPER_DIR": "/Applications/Xcode.app" }, - // Disable automatic generation of launch `launch.json` + // Disable automatic generation of launch configuration (`launch.json`) "swift.autoGenerateLaunchConfigurations": false } \ No newline at end of file From fb634a4ba834f7bac70be0571b71cd892a320624 Mon Sep 17 00:00:00 2001 From: Mattt Zmuda Date: Wed, 10 Sep 2025 02:47:18 -0700 Subject: [PATCH 02/12] Add .swift-format configuration file --- .swift-format | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 .swift-format diff --git a/.swift-format b/.swift-format new file mode 100644 index 00000000..7e6f18f2 --- /dev/null +++ b/.swift-format @@ -0,0 +1,77 @@ +{ + "fileScopedDeclarationPrivacy": { + "accessLevel": "private" + }, + "indentConditionalCompilationBlocks": false, + "indentSwitchCaseLabels": false, + "indentation": { + "spaces": 4 + }, + "lineBreakAroundMultilineExpressionChainComponents": false, + "lineBreakBeforeControlFlowKeywords": false, + "lineBreakBeforeEachArgument": false, + "lineBreakBeforeEachGenericRequirement": false, + "lineBreakBetweenDeclarationAttributes": false, + "lineLength": 10000, + "maximumBlankLines": 1, + "multiElementCollectionTrailingCommas": true, + "noAssignmentInExpressions": { + "allowedFunctions": [ + "XCTAssertNoThrow" + ] + }, + "prioritizeKeepingFunctionOutputTogether": false, + "reflowMultilineStringLiterals": { + "never": {} + }, + "respectsExistingLineBreaks": true, + "rules": { + "AllPublicDeclarationsHaveDocumentation": false, + "AlwaysUseLiteralForEmptyCollectionInit": false, + "AlwaysUseLowerCamelCase": false, + "AmbiguousTrailingClosureOverload": true, + "AvoidRetroactiveConformances": true, + "BeginDocumentationCommentWithOneLineSummary": false, + "DoNotUseSemicolons": false, + "DontRepeatTypeInStaticProperties": true, + "FileScopedDeclarationPrivacy": true, + "FullyIndirectEnum": true, + "GroupNumericLiterals": false, + "IdentifiersMustBeASCII": true, + "NeverForceUnwrap": false, + "NeverUseForceTry": false, + "NeverUseImplicitlyUnwrappedOptionals": false, + "NoAccessLevelOnExtensionDeclaration": false, + "NoAssignmentInExpressions": true, + "NoBlockComments": true, + "NoCasesWithOnlyFallthrough": true, + "NoEmptyLinesOpeningClosingBraces": false, + "NoEmptyTrailingClosureParentheses": true, + "NoLabelsInCasePatterns": true, + "NoLeadingUnderscores": false, + "NoParensAroundConditions": true, + "NoPlaygroundLiterals": true, + "NoVoidReturnOnFunctionSignature": true, + "OmitExplicitReturns": false, + "OneCasePerLine": true, + "OneVariableDeclarationPerLine": true, + "OnlyOneTrailingClosureArgument": true, + "OrderedImports": true, + "ReplaceForEachWithForLoop": true, + "ReturnVoidInsteadOfEmptyTuple": true, + "TypeNamesShouldBeCapitalized": true, + "UseEarlyExits": false, + "UseExplicitNilCheckInConditions": true, + "UseLetInEveryBoundCaseVariable": false, + "UseShorthandTypeNames": true, + "UseSingleLinePropertyGetter": true, + "UseSynthesizedInitializer": true, + "UseTripleSlashForDocumentationComments": false, + "UseWhereClausesInForLoops": false, + "ValidateDocumentationComments": true + }, + "spacesAroundRangeFormationOperators": false, + "spacesBeforeEndOfLineComments": 1, + "tabWidth": 8, + "version": 1 +} \ No newline at end of file From 1a9164566f1f856c3347eacb5bd88b486c56e664 Mon Sep 17 00:00:00 2001 From: Mattt Zmuda Date: Wed, 10 Sep 2025 03:26:56 -0700 Subject: [PATCH 03/12] Remove SwiftFormat configuration --- .swiftformat | 87 ------------------------------------------- .vscode/settings.json | 4 -- 2 files changed, 91 deletions(-) delete mode 100644 .swiftformat diff --git a/.swiftformat b/.swiftformat deleted file mode 100644 index 95eff411..00000000 --- a/.swiftformat +++ /dev/null @@ -1,87 +0,0 @@ ---swiftversion 5.9 ---acronyms ID,URL,UUID ---allman false ---anonymousforeach convert ---assetliterals visual-width ---asynccapturing ---beforemarks ---binarygrouping 4,8 ---categorymark "MARK: %c" ---classthreshold 0 ---closingparen balanced ---closurevoid remove ---commas always ---conflictmarkers reject ---decimalgrouping ignore ---elseposition same-line ---emptybraces spaced ---enumthreshold 0 ---exponentcase lowercase ---exponentgrouping disabled ---extensionacl on-extension ---extensionlength 0 ---extensionmark "MARK: - %t + %c" ---fractiongrouping disabled ---fragment false ---funcattributes preserve ---generictypes ---groupedextension "MARK: %c" ---guardelse auto ---header ignore ---hexgrouping 4,8 ---hexliteralcase uppercase ---ifdef no-indent ---importgrouping alpha ---indent 4 ---indentcase false ---indentstrings false ---lifecycle ---lineaftermarks true ---linebreaks lf ---markcategories true ---markextensions always ---marktypes always ---maxwidth none ---modifierorder ---nevertrailing ---nospaceoperators ---nowrapoperators ---octalgrouping 4,8 ---onelineforeach ignore ---operatorfunc spaced ---organizetypes actor,class,enum,struct ---patternlet hoist ---ranges no-space ---redundanttype infer-locals-only ---self remove ---selfrequired ---semicolons inline ---shortoptionals always ---smarttabs enabled ---someany true ---stripunusedargs unnamed-only ---structthreshold 0 ---tabwidth unspecified ---throwcapturing ---trailingclosures ---typeattributes preserve ---typeblanklines remove ---typemark "MARK: - %t" ---varattributes preserve ---voidtype void ---wraparguments preserve ---wrapcollections preserve ---wrapconditions preserve ---wrapeffects preserve ---wrapenumcases always ---wrapparameters preserve ---wrapreturntype preserve ---wrapternary default ---wraptypealiases preserve ---xcodeindentation disabled ---yodaswap always ---disable blankLineAfterImports,unusedArguments ---enable docComments ---disable enumnamespaces ---trimwhitespace always ---disable preferKeyPath diff --git a/.vscode/settings.json b/.vscode/settings.json index c263d4a0..d5dba732 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,8 +1,4 @@ { - // SwiftFormat configuration to match CI - "swiftformat.configSearchPaths": [ - ".swiftformat" - ], // Swift language settings "swift.path": "/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin", "swift.buildPath": ".build", From b973a3705683a7476dab09729262effed3f81084 Mon Sep 17 00:00:00 2001 From: Mattt Zmuda Date: Wed, 10 Sep 2025 03:27:10 -0700 Subject: [PATCH 04/12] Set file association for .swift-format --- .vscode/settings.json | 1 + 1 file changed, 1 insertion(+) diff --git a/.vscode/settings.json b/.vscode/settings.json index d5dba732..05a719fa 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -11,6 +11,7 @@ "editor.trimAutoWhitespace": true, // File associations "files.associations": { + ".swift-format": "json", "*.swift": "swift" }, // Format on save for Swift files From 3da0c9ceee08fee91d3201128719ee333dc13c7b Mon Sep 17 00:00:00 2001 From: Mattt Zmuda Date: Wed, 10 Sep 2025 03:27:29 -0700 Subject: [PATCH 05/12] Configure swiftlang.swift-vscode as default formatter --- .vscode/settings.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 05a719fa..ea21521c 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -17,7 +17,7 @@ // Format on save for Swift files "[swift]": { "editor.formatOnSave": true, - "editor.defaultFormatter": "vknabel.vscode-swiftformat" + "editor.defaultFormatter": "swiftlang.swift-vscode" }, // Exclude build artifacts from search "search.exclude": { From d2414b90b249dfde4a63ae54c9003f2f70e69b08 Mon Sep 17 00:00:00 2001 From: Mattt Zmuda Date: Fri, 19 Sep 2025 00:39:05 -0700 Subject: [PATCH 06/12] Update format checking workflow to use swift-format --- .github/workflows/format.yml | 29 ++++++++--------------------- 1 file changed, 8 insertions(+), 21 deletions(-) diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml index bfb4dfce..2ba9a9a4 100644 --- a/.github/workflows/format.yml +++ b/.github/workflows/format.yml @@ -2,41 +2,30 @@ name: Swift Format on: pull_request: paths: - - '**.swift' + - "**.swift" workflow_dispatch: -env: - SWIFTFORMAT_VERSION: "0.54.6" - jobs: swift-format: name: Check Swift Formatting runs-on: macos-latest - + steps: - uses: actions/checkout@v4 - - - name: Install SwiftFormat - run: | - curl -L https://github.com/nicklockwood/SwiftFormat/releases/download/${{ env.SWIFTFORMAT_VERSION }}/swiftformat.zip -o swiftformat.zip - unzip swiftformat.zip - sudo mv swiftformat /usr/local/bin/ - chmod +x /usr/local/bin/swiftformat - swiftformat --version - + - name: Check formatting run: | found_issues=false files_with_issues=() - + while IFS= read -r file; do - if ! swiftformat --config .swiftformat --lint "$file"; then + if ! swift format lint --strict "$file"; then found_issues=true files_with_issues+=("$file") echo "❌ Formatting issues found in: $file" fi done < <(find . -name "*.swift" -type f) - + if [ "$found_issues" = true ]; then echo "❌ The following files need formatting:" printf '%s\n' "${files_with_issues[@]}" @@ -49,10 +38,8 @@ jobs: if: failure() run: | echo "### Here's how to fix the formatting locally:" >> $GITHUB_STEP_SUMMARY - echo '```bash' >> $GITHUB_STEP_SUMMARY - echo "# Install SwiftFormat version ${{ env.SWIFTFORMAT_VERSION }}" >> $GITHUB_STEP_SUMMARY - echo "brew install swiftformat@${{ env.SWIFTFORMAT_VERSION }}" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY + echo "```bash" >> $GITHUB_STEP_SUMMARY echo "# Format all Swift files" >> $GITHUB_STEP_SUMMARY - echo 'swiftformat --config .swiftformat .' >> $GITHUB_STEP_SUMMARY + echo 'swift format -i --recursive .' >> $GITHUB_STEP_SUMMARY echo '```' >> $GITHUB_STEP_SUMMARY From d1eed36c3df8f58e72bcb9f97d8489c2bdef6129 Mon Sep 17 00:00:00 2001 From: Mattt Zmuda Date: Fri, 19 Sep 2025 00:47:08 -0700 Subject: [PATCH 07/12] Fix ValidateDocumentationComments linter error --- Sources/Hub/Downloader.swift | 6 ++---- Sources/Hub/HubApi.swift | 4 +--- Sources/Tokenizers/TokenLattice.swift | 3 +-- 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/Sources/Hub/Downloader.swift b/Sources/Hub/Downloader.swift index 1984a4e5..e2d54c92 100644 --- a/Sources/Hub/Downloader.swift +++ b/Sources/Hub/Downloader.swift @@ -91,7 +91,6 @@ final class Downloader: NSObject, Sendable, ObservableObject { /// - authToken: Bearer token for authentication with Hugging Face /// - resumeSize: Number of bytes already downloaded for resuming interrupted downloads /// - headers: Additional HTTP headers to include in the request - /// - expectedSize: Expected file size in bytes for validation /// - timeout: Time interval before the request times out /// - numRetries: Number of retry attempts for failed downloads private func setUpDownload( @@ -193,9 +192,8 @@ final class Downloader: NSObject, Sendable, ObservableObject { /// /// - Parameters: /// - request: The URLRequest for the file to download - /// - resumeSize: The number of bytes already downloaded. If set to 0 (default), the whole file is download. If set to a positive number, the download will resume at the given position + /// - tempFile: The file handle for writing downloaded data /// - numRetries: The number of retry attempts remaining for failed downloads - /// - expectedSize: The expected size of the file to download. If set, the download will raise an error if the size of the received content is different from the expected one. /// - Throws: `DownloadError.unexpectedError` if the response is invalid or file size mismatch occurs /// `URLError` if the download fails after all retries are exhausted private func httpGet( @@ -298,7 +296,7 @@ final class Downloader: NSObject, Sendable, ObservableObject { } /// Check if an incomplete file exists for the destination and returns its size - /// - Parameter destination: The destination URL for the download + /// - Parameter incompletePath: The URL path for the incomplete file /// - Returns: Size of the incomplete file if it exists, otherwise 0 static func incompleteFileSize(at incompletePath: URL) -> Int { if FileManager.default.fileExists(atPath: incompletePath.path) { diff --git a/Sources/Hub/HubApi.swift b/Sources/Hub/HubApi.swift index 97d9d2d2..865d69c8 100644 --- a/Sources/Hub/HubApi.swift +++ b/Sources/Hub/HubApi.swift @@ -298,9 +298,7 @@ public extension HubApi { /// /// Reference: https://github.com/huggingface/huggingface_hub/blob/b2c9a148d465b43ab90fab6e4ebcbbf5a9df27d4/src/huggingface_hub/_local_folder.py#L263 /// - /// - Parameters: - /// - localDir: The local directory where metadata files are downloaded. - /// - filePath: The path of the file for which metadata is being read. + /// - Parameter metadataPath: The path of the metadata file to read. /// - Throws: An `EnvironmentError.invalidMetadataError` if the metadata file is invalid and cannot be removed. /// - Returns: A `LocalDownloadFileMetadata` object if the metadata file exists and is valid, or `nil` if the file is missing or invalid. func readDownloadMetadata(metadataPath: URL) throws -> LocalDownloadFileMetadata? { diff --git a/Sources/Tokenizers/TokenLattice.swift b/Sources/Tokenizers/TokenLattice.swift index 41abe6ca..2aec976e 100644 --- a/Sources/Tokenizers/TokenLattice.swift +++ b/Sources/Tokenizers/TokenLattice.swift @@ -98,8 +98,7 @@ extension TokenLattice { /// Returns the substring of the sentence to be tokenized associated to the specified node /// - /// - Parameters: - /// - node: The node defining the token to be extracted + /// - Parameter node: The node defining the token to be extracted /// /// - Returns: A **Substring** – i.e., a reference to the original positions, not a copy of the characters. func piece(_ node: TokenLatticeNode) -> any StringProtocol { From d664557778cbdd6af31e9612cda5faefcf9806f8 Mon Sep 17 00:00:00 2001 From: Mattt Zmuda Date: Fri, 19 Sep 2025 00:47:56 -0700 Subject: [PATCH 08/12] Fix ReplaceForEachWithForLoop linter error --- Sources/Hub/HubApi.swift | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Sources/Hub/HubApi.swift b/Sources/Hub/HubApi.swift index 865d69c8..83c4f040 100644 --- a/Sources/Hub/HubApi.swift +++ b/Sources/Hub/HubApi.swift @@ -956,8 +956,10 @@ private final class RedirectDelegate: NSObject, URLSessionTaskDelegate, Sendable if let resolvedUrl = components.url { var newRequest = URLRequest(url: resolvedUrl) // Copy headers from original request - task.originalRequest?.allHTTPHeaderFields?.forEach { key, value in - newRequest.setValue(value, forHTTPHeaderField: key) + if let headers = task.originalRequest?.allHTTPHeaderFields { + for (key, value) in headers { + newRequest.setValue(value, forHTTPHeaderField: key) + } } newRequest.setValue(resolvedUrl.absoluteString, forHTTPHeaderField: "Location") completionHandler(newRequest) From eccdf38bd142b3b1e743972e3b7fc3e7619c12c0 Mon Sep 17 00:00:00 2001 From: Mattt Zmuda Date: Fri, 19 Sep 2025 00:49:41 -0700 Subject: [PATCH 09/12] Fix NoBlockComments linter errors --- Sources/Tokenizers/Tokenizer.swift | 12 +++++------- Tests/TokenizersTests/TokenizerTests.swift | 2 +- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/Sources/Tokenizers/Tokenizer.swift b/Sources/Tokenizers/Tokenizer.swift index f20fda50..9a440754 100644 --- a/Sources/Tokenizers/Tokenizer.swift +++ b/Sources/Tokenizers/Tokenizer.swift @@ -298,8 +298,8 @@ public class PreTrainedTokenizer: Tokenizer { var addedTokens: [String: Int] = [:] var specialTokens: [String: Int] = [:] for addedToken in tokenizerData["addedTokens"].array(or: []) { - guard let id = addedToken["id"].integer() else { continue /* malformed: token with no id */ } - guard let content = addedToken.content.string() else { continue /* malformed: token with no content */ } + guard let id = addedToken["id"].integer() else { continue } // malformed: token with no id + guard let content = addedToken.content.string() else { continue } // malformed: token with no content addedTokens[content] = id if addedToken["special"].boolean(or: false) { @@ -556,11 +556,9 @@ public class PreTrainedTokenizer: Tokenizer { context["tools"] = tools } if let additionalContext { - /* - Additional keys and values to be added to the context provided to the prompt templating engine. - For example, the app could set "tools_in_user_message" to false for Llama 3.1 and 3.2 if a system message is provided. - The default value is true in the Llama 3.1 and 3.2 chat templates, but these models will perform better if the tools are included in a system message. - */ + // Additional keys and values to be added to the context provided to the prompt templating engine. + // For example, the app could set "tools_in_user_message" to false for Llama 3.1 and 3.2 if a system message is provided. + // The default value is true in the Llama 3.1 and 3.2 chat templates, but these models will perform better if the tools are included in a system message. for (key, value) in additionalContext { context[key] = value } diff --git a/Tests/TokenizersTests/TokenizerTests.swift b/Tests/TokenizersTests/TokenizerTests.swift index 44686880..2e170600 100644 --- a/Tests/TokenizersTests/TokenizerTests.swift +++ b/Tests/TokenizersTests/TokenizerTests.swift @@ -140,7 +140,7 @@ struct TokenizerTests { let tokenizer = tokenizerOpt! // These are two different characters - let cases = ["à" /* 0x61 0x300 */, "à" /* 0xe0 */ ] + let cases = ["à" /* 0x61 0x300 */, "à" /* 0xe0 */] let expected = [217138, 1305] for (s, expected) in zip(cases, expected) { let encoded = tokenizer.encode(text: " " + s) From 08d98da9dd8b7193be06f8be47a30b522bcf411c Mon Sep 17 00:00:00 2001 From: Mattt Zmuda Date: Fri, 19 Sep 2025 00:55:15 -0700 Subject: [PATCH 10/12] Consolidate format and unit-tests workflows into single CI workflow --- .github/workflows/ci.yml | 39 +++++++++++++++++++++++++++ .github/workflows/format.yml | 45 -------------------------------- .github/workflows/unit-tests.yml | 19 -------------- 3 files changed, 39 insertions(+), 64 deletions(-) create mode 100644 .github/workflows/ci.yml delete mode 100644 .github/workflows/format.yml delete mode 100644 .github/workflows/unit-tests.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..09a0c02a --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,39 @@ +name: Unit Tests + +on: + # This runs in the context of the base of the pull request, + # rather than in the context of the merge commit. + # Maintainers must approve and we tighten permissions below. + pull_request_target: + types: [opened, synchronize, reopened] + +permissions: + contents: read + +jobs: + build-and-test: + uses: huggingface/hf-workflows/.github/workflows/swift_transformers_unit_tests.yml@main + with: + # Use the PR merge ref, not the head. + pr_number: ${{ github.event.pull_request.number }} + secrets: inherit + + lint: + name: Lint + runs-on: macos-latest + + steps: + - uses: actions/checkout@v4 + + - run: | + swift format lint --recursive . + + - name: Suggest fixes (if check fails) + if: failure() + run: | + echo "### Here's how to fix the formatting locally:" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "```bash" >> $GITHUB_STEP_SUMMARY + echo "# Format all Swift files" >> $GITHUB_STEP_SUMMARY + echo 'swift format -i --recursive .' >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml deleted file mode 100644 index 2ba9a9a4..00000000 --- a/.github/workflows/format.yml +++ /dev/null @@ -1,45 +0,0 @@ -name: Swift Format -on: - pull_request: - paths: - - "**.swift" - workflow_dispatch: - -jobs: - swift-format: - name: Check Swift Formatting - runs-on: macos-latest - - steps: - - uses: actions/checkout@v4 - - - name: Check formatting - run: | - found_issues=false - files_with_issues=() - - while IFS= read -r file; do - if ! swift format lint --strict "$file"; then - found_issues=true - files_with_issues+=("$file") - echo "❌ Formatting issues found in: $file" - fi - done < <(find . -name "*.swift" -type f) - - if [ "$found_issues" = true ]; then - echo "❌ The following files need formatting:" - printf '%s\n' "${files_with_issues[@]}" - exit 1 - else - echo "✅ All Swift files are properly formatted!" - fi - - - name: Suggest fixes (if check fails) - if: failure() - run: | - echo "### Here's how to fix the formatting locally:" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "```bash" >> $GITHUB_STEP_SUMMARY - echo "# Format all Swift files" >> $GITHUB_STEP_SUMMARY - echo 'swift format -i --recursive .' >> $GITHUB_STEP_SUMMARY - echo '```' >> $GITHUB_STEP_SUMMARY diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml deleted file mode 100644 index 3a049756..00000000 --- a/.github/workflows/unit-tests.yml +++ /dev/null @@ -1,19 +0,0 @@ -name: Unit Tests - -on: - # This runs in the context of the base of the pull request, - # rather than in the context of the merge commit. - # Maintainers must approve and we tighten permissions below. - pull_request_target: - types: [opened, synchronize, reopened] - -permissions: - contents: read - -jobs: - build-and-test: - uses: huggingface/hf-workflows/.github/workflows/swift_transformers_unit_tests.yml@main - with: - # Use the PR merge ref, not the head. - pr_number: ${{ github.event.pull_request.number }} - secrets: inherit From 92fe437b47db56842643eaed79126df1f8a707f4 Mon Sep 17 00:00:00 2001 From: Mattt Zmuda Date: Fri, 19 Sep 2025 01:21:07 -0700 Subject: [PATCH 11/12] Remove vknabel.vscode-swiftformat from recommended extensions --- .vscode/extensions.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.vscode/extensions.json b/.vscode/extensions.json index eec46520..6c8da70e 100644 --- a/.vscode/extensions.json +++ b/.vscode/extensions.json @@ -1,6 +1,5 @@ { "recommendations": [ "swiftlang.swift-vscode", - "vknabel.vscode-swiftformat" ] -} +} \ No newline at end of file From 66c4a66e280943b3275635988e5c9bef4796417c Mon Sep 17 00:00:00 2001 From: Mattt Zmuda Date: Tue, 23 Sep 2025 09:45:57 -0700 Subject: [PATCH 12/12] swift format -i --recursive . --- Package.swift | 6 +- Sources/Generation/GenerationConfig.swift | 2 +- Sources/Generation/Math.swift | 2 +- Sources/Hub/BOMDoubling.swift | 2 +- Sources/Hub/BinaryDistinct.swift | 8 +- Sources/Hub/Config.swift | 12 +- Sources/Hub/Hub.swift | 4 +- Sources/Hub/HubApi.swift | 31 ++--- Sources/Models/LanguageModel.swift | 4 +- Sources/Models/Weights.swift | 4 +- Sources/Tokenizers/BertTokenizer.swift | 15 ++- Sources/Tokenizers/Decoder.swift | 13 ++- Sources/Tokenizers/Normalizer.swift | 35 +++--- Sources/Tokenizers/PostProcessor.swift | 2 +- Sources/Tokenizers/Tokenizer.swift | 41 +++---- Sources/Tokenizers/UnigramTokenizer.swift | 2 +- Sources/Tokenizers/Utils.swift | 9 +- Tests/HubTests/ConfigTests.swift | 110 +++++++++--------- Tests/HubTests/DownloaderTests.swift | 23 ++-- Tests/HubTests/HubApiTests.swift | 54 ++++----- Tests/HubTests/HubTests.swift | 5 +- Tests/ModelsTests/WeightsTests.swift | 3 +- .../TokenizersTests/BertTokenizerTests.swift | 39 ++++--- Tests/TokenizersTests/ChatTemplateTests.swift | 60 +++++----- Tests/TokenizersTests/NormalizerTests.swift | 2 +- Tests/TokenizersTests/PreTokenizerTests.swift | 10 +- Tests/TokenizersTests/SplitTests.swift | 57 +++------ Tests/TokenizersTests/TrieTests.swift | 1 + 28 files changed, 270 insertions(+), 286 deletions(-) diff --git a/Package.swift b/Package.swift index 5485ddd0..55acf44e 100644 --- a/Package.swift +++ b/Package.swift @@ -5,17 +5,17 @@ import PackageDescription /// Define the strict concurrency settings to be applied to all targets. let swiftSettings: [SwiftSetting] = [ - .enableExperimentalFeature("StrictConcurrency"), + .enableExperimentalFeature("StrictConcurrency") ] let package = Package( name: "swift-transformers", platforms: [.iOS(.v16), .macOS(.v13)], products: [ - .library(name: "Transformers", targets: ["Tokenizers", "Generation", "Models"]), + .library(name: "Transformers", targets: ["Tokenizers", "Generation", "Models"]) ], dependencies: [ - .package(url: "https://github.com/johnmai-dev/Jinja", .upToNextMinor(from: "1.3.0")), + .package(url: "https://github.com/johnmai-dev/Jinja", .upToNextMinor(from: "1.3.0")) ], targets: [ .target(name: "Generation", dependencies: ["Tokenizers"]), diff --git a/Sources/Generation/GenerationConfig.swift b/Sources/Generation/GenerationConfig.swift index b68b26e9..9223648e 100644 --- a/Sources/Generation/GenerationConfig.swift +++ b/Sources/Generation/GenerationConfig.swift @@ -55,4 +55,4 @@ public extension GenerationConfig { } } -extension GenerationConfig: Decodable { } +extension GenerationConfig: Decodable {} diff --git a/Sources/Generation/Math.swift b/Sources/Generation/Math.swift index eb5aa1ab..8c1b06b9 100644 --- a/Sources/Generation/Math.swift +++ b/Sources/Generation/Math.swift @@ -19,7 +19,7 @@ import Foundation public enum Math { /** Returns the index and value of the largest element in the array. - + - Parameters: - ptr: Pointer to the first element in memory. - count: How many elements to look at. diff --git a/Sources/Hub/BOMDoubling.swift b/Sources/Hub/BOMDoubling.swift index 1e197f5d..9ed906f0 100644 --- a/Sources/Hub/BOMDoubling.swift +++ b/Sources/Hub/BOMDoubling.swift @@ -26,7 +26,7 @@ extension Data { // Check for \u{feff} BOM (observed in Gemma tokenizers), which is encoded as 0xef 0xbb 0xbf. // We may need more combinations. if b == 0x22, i + 3 < src.count, - src[i + 1] == 0xEF, src[i + 2] == 0xBB, src[i + 3] == 0xBF + src[i + 1] == 0xEF, src[i + 2] == 0xBB, src[i + 3] == 0xBF { // Duplicate BOM out.append(0xEF); out.append(0xBB); out.append(0xBF) diff --git a/Sources/Hub/BinaryDistinct.swift b/Sources/Hub/BinaryDistinct.swift index 24ff357a..ce61b237 100644 --- a/Sources/Hub/BinaryDistinct.swift +++ b/Sources/Hub/BinaryDistinct.swift @@ -193,11 +193,11 @@ public extension Dictionary where Key == BinaryDistinctString { } } -public protocol StringConvertible: ExpressibleByStringLiteral { } +public protocol StringConvertible: ExpressibleByStringLiteral {} -extension BinaryDistinctString: StringConvertible { } -extension String: StringConvertible { } -extension NSString: StringConvertible { } +extension BinaryDistinctString: StringConvertible {} +extension String: StringConvertible {} +extension NSString: StringConvertible {} public struct BinaryDistinctCharacter: Equatable, Hashable, CustomStringConvertible, ExpressibleByStringLiteral { let bytes: [UInt16] diff --git a/Sources/Hub/Config.swift b/Sources/Hub/Config.swift index f4f31470..325f7bc9 100644 --- a/Sources/Hub/Config.swift +++ b/Sources/Hub/Config.swift @@ -647,22 +647,22 @@ extension Config: Codable { let intValue = try container.decode(Int.self) self.value = .integer(intValue) return - } catch { } + } catch {} do { let floatValue = try container.decode(Float.self) self.value = .floating(floatValue) return - } catch { } + } catch {} do { let boolValue = try container.decode(Bool.self) self.value = .boolean(boolValue) return - } catch { } + } catch {} do { let stringValue = try container.decode(String.self) self.value = .string(.init(stringValue)) return - } catch { } + } catch {} } if let tupple = Self.decodeTuple(decoder) { @@ -690,7 +690,7 @@ extension Config: Codable { let intValue = try container.decode(UInt.self) let stringValue = try container.decode(String.self) return .token((intValue, .init(stringValue))) - } catch { } + } catch {} } } return nil @@ -706,7 +706,7 @@ extension Config: Codable { } return .array(elements) } - } catch { } + } catch {} return nil } diff --git a/Sources/Hub/Hub.swift b/Sources/Hub/Hub.swift index b26f934e..4f03e822 100644 --- a/Sources/Hub/Hub.swift +++ b/Sources/Hub/Hub.swift @@ -7,7 +7,7 @@ import Foundation -public struct Hub: Sendable { } +public struct Hub: Sendable {} public extension Hub { enum HubClientError: LocalizedError { @@ -204,7 +204,7 @@ public final class LanguageModelConfigurationFromHub: Sendable { // Try to load .jinja template as plain text chatTemplate = try? String(contentsOf: chatTemplateJinjaURL, encoding: .utf8) } else if FileManager.default.fileExists(atPath: chatTemplateJsonURL.path), - let chatTemplateConfig = try? hubApi.configuration(fileURL: chatTemplateJsonURL) + let chatTemplateConfig = try? hubApi.configuration(fileURL: chatTemplateJsonURL) { // Fall back to .json template chatTemplate = chatTemplateConfig.chatTemplate.string() diff --git a/Sources/Hub/HubApi.swift b/Sources/Hub/HubApi.swift index 83c4f040..7def999e 100644 --- a/Sources/Hub/HubApi.swift +++ b/Sources/Hub/HubApi.swift @@ -44,8 +44,8 @@ extension HTTPURLResponse { if trimmed.contains("rel=\"\(rel)\"") || trimmed.contains("rel=\(rel)") { if let start = trimmed.firstIndex(of: "<"), - let end = trimmed.firstIndex(of: ">"), - start < end + let end = trimmed.firstIndex(of: ">"), + start < end { let startIndex = trimmed.index(after: start) return String(trimmed[startIndex.. return file if hub.isValidHash(hash: remoteCommitHash, pattern: hub.commitHashPattern), downloaded, localMetadata != nil, - localCommitHash == remoteCommitHash + localCommitHash == remoteCommitHash { return destination } // From now on, etag, commit_hash, url and size are not empty guard let remoteCommitHash = remoteMetadata.commitHash, - let remoteEtag = remoteMetadata.etag, - let remoteSize = remoteMetadata.size, - remoteMetadata.location != "" + let remoteEtag = remoteMetadata.etag, + let remoteSize = remoteMetadata.size, + remoteMetadata.location != "" else { throw EnvironmentError.invalidMetadataError("File metadata must have been retrieved from server") } @@ -524,9 +524,9 @@ public extension HubApi { let repoDestination = localRepoLocation(repo) let repoMetadataDestination = repoDestination - .appendingPathComponent(".cache") - .appendingPathComponent("huggingface") - .appendingPathComponent("download") + .appendingPathComponent(".cache") + .appendingPathComponent("huggingface") + .appendingPathComponent("download") if await NetworkMonitor.shared.state.shouldUseOfflineMode() || useOfflineMode == true { if !FileManager.default.fileExists(atPath: repoDestination.path) { @@ -709,8 +709,9 @@ public extension HubApi { return nil } - guard var refreshRoute = response.getLinkURL(for: HFHttpHeaders.linkXetAuthKey) - ?? response.allHeaderFields[HFHttpHeaders.xetRefreshRoute] as? String + guard + var refreshRoute = response.getLinkURL(for: HFHttpHeaders.linkXetAuthKey) + ?? response.allHeaderFields[HFHttpHeaders.xetRefreshRoute] as? String else { return nil } @@ -940,13 +941,13 @@ private final class RedirectDelegate: NSObject, URLSessionTaskDelegate, Sendable if (300...399).contains(response.statusCode) { // Get the Location header if let locationString = response.value(forHTTPHeaderField: "Location"), - let locationUrl = URL(string: locationString) + let locationUrl = URL(string: locationString) { // Check if it's a relative redirect (no host component) if locationUrl.host == nil { // For relative redirects, construct the new URL using the original request's base if let originalUrl = task.originalRequest?.url, - var components = URLComponents(url: originalUrl, resolvingAgainstBaseURL: true) + var components = URLComponents(url: originalUrl, resolvingAgainstBaseURL: true) { // Update the path component with the relative path components.path = locationUrl.path diff --git a/Sources/Models/LanguageModel.swift b/Sources/Models/LanguageModel.swift index f115ab9f..40d870f5 100644 --- a/Sources/Models/LanguageModel.swift +++ b/Sources/Models/LanguageModel.swift @@ -74,7 +74,7 @@ public extension LanguageModel { public extension LanguageModel { var description: String { if let description = model.modelDescription.metadata[MLModelMetadataKey.description] as? String, - !description.isEmpty + !description.isEmpty { return description } @@ -84,7 +84,7 @@ public extension LanguageModel { /// `name_or_path` in the Python world var modelName: String { if let userFields = model.modelDescription.metadata[MLModelMetadataKey.creatorDefinedKey] as? [String: String], - let name = userFields["co.huggingface.exporters.name"] + let name = userFields["co.huggingface.exporters.name"] { return name } diff --git a/Sources/Models/Weights.swift b/Sources/Models/Weights.swift index 2dcc38cd..0fb4fff0 100644 --- a/Sources/Models/Weights.swift +++ b/Sources/Models/Weights.swift @@ -75,8 +75,8 @@ struct Safetensor { var dict = [String: MLMultiArray]() for (key, point) in header { guard let offsets = point?.dataOffsets, offsets.count == 2, - let shape = point?.shape as? [NSNumber], - let dType = try point?.dataType + let shape = point?.shape as? [NSNumber], + let dType = try point?.dataType else { continue } let strides = shape.dropFirst().reversed().reduce(into: [1]) { acc, a in diff --git a/Sources/Tokenizers/BertTokenizer.swift b/Sources/Tokenizers/BertTokenizer.swift index 7410846f..30d02563 100644 --- a/Sources/Tokenizers/BertTokenizer.swift +++ b/Sources/Tokenizers/BertTokenizer.swift @@ -63,12 +63,15 @@ public class BertTokenizer { result[element.key.string] = val } } - if let pairs = tokenizerData.addedTokens.array()?.reduce(into: [String: Int](), { result, element in - guard let val = element["id"].integer() else { return } - guard let key = element["content"].string() else { return } - - result[key] = val - }) { + if let pairs = tokenizerData.addedTokens.array()?.reduce( + into: [String: Int](), + { result, element in + guard let val = element["id"].integer() else { return } + guard let key = element["content"].string() else { return } + + result[key] = val + }) + { vocabulary.merge(pairs, uniquingKeysWith: { $1 }) } diff --git a/Sources/Tokenizers/Decoder.swift b/Sources/Tokenizers/Decoder.swift index 13cd97a4..217297d6 100644 --- a/Sources/Tokenizers/Decoder.swift +++ b/Sources/Tokenizers/Decoder.swift @@ -68,10 +68,11 @@ class WordPieceDecoder: Decoder { func decode(tokens: [String]) -> [String] { let firstToken = cleanup ? cleanUpTokenization(tokens.first!) : tokens.first! - return [firstToken] + tokens.dropFirst().map { token in - let token = token.hasPrefix(prefix) ? token.replacingCharacters(in: token.range(of: prefix)!, with: "") : " \(token)" - return cleanup ? cleanUpTokenization(token) : token - } + return [firstToken] + + tokens.dropFirst().map { token in + let token = token.hasPrefix(prefix) ? token.replacingCharacters(in: token.range(of: prefix)!, with: "") : " \(token)" + return cleanup ? cleanUpTokenization(token) : token + } } /// https://github.com/huggingface/tokenizers/blob/main/tokenizers/src/decoders/wordpiece.rs#L40 @@ -153,7 +154,7 @@ class ReplaceDecoder: Decoder { } class ByteFallbackDecoder: Decoder { - required init(config: Config) { } + required init(config: Config) {} func decode(tokens: [String]) -> [String] { var newTokens: [String] = [] @@ -186,7 +187,7 @@ class ByteFallbackDecoder: Decoder { } class FuseDecoder: Decoder { - required init(config: Config) { } + required init(config: Config) {} func decode(tokens: [String]) -> [String] { [tokens.joined(separator: "")] diff --git a/Sources/Tokenizers/Normalizer.swift b/Sources/Tokenizers/Normalizer.swift index 46992802..c8a9c77e 100644 --- a/Sources/Tokenizers/Normalizer.swift +++ b/Sources/Tokenizers/Normalizer.swift @@ -104,7 +104,7 @@ class ReplaceNormalizer: Normalizer { } class LowercaseNormalizer: Normalizer { - required init(config: Config) { } + required init(config: Config) {} func normalize(text: String) -> String { text.lowercased() @@ -112,7 +112,7 @@ class LowercaseNormalizer: Normalizer { } class NFDNormalizer: Normalizer { - required init(config: Config) { } + required init(config: Config) {} func normalize(text: String) -> String { text.decomposedStringWithCanonicalMapping @@ -120,7 +120,7 @@ class NFDNormalizer: Normalizer { } class NFCNormalizer: Normalizer { - required init(config: Config) { } + required init(config: Config) {} func normalize(text: String) -> String { text.precomposedStringWithCanonicalMapping @@ -128,7 +128,7 @@ class NFCNormalizer: Normalizer { } class NFKDNormalizer: Normalizer { - required init(config: Config) { } + required init(config: Config) {} func normalize(text: String) -> String { text.decomposedStringWithCompatibilityMapping @@ -136,7 +136,7 @@ class NFKDNormalizer: Normalizer { } class NFKCNormalizer: Normalizer { - required init(config: Config) { } + required init(config: Config) {} func normalize(text: String) -> String { text.precomposedStringWithCompatibilityMapping @@ -177,9 +177,9 @@ class BertNormalizer: Normalizer { private func cleanText(text: String) -> String { text.map { c in guard let scalar = c.unicodeScalars.first, - scalar.value != 0x0, - scalar.value != 0xFFFD, - !isControl(scalar) + scalar.value != 0x0, + scalar.value != 0xFFFD, + !isControl(scalar) else { return "\(c)" } // Replace whitespace: \t, \n, \r @@ -220,15 +220,16 @@ class BertNormalizer: Normalizer { private func stripAccents(text: String) -> String { // This might be the same as `text.folding(options: .diacriticInsensitive, locale: nil)` - String(text.decomposedStringWithCanonicalMapping.unicodeScalars.filter { scalar in - !(scalar.value >= 0x0300 && scalar.value <= 0x036F) - }) + String( + text.decomposedStringWithCanonicalMapping.unicodeScalars.filter { scalar in + !(scalar.value >= 0x0300 && scalar.value <= 0x036F) + }) } } class PrecompiledNormalizer: Normalizer { // TODO: use `precompiledCharsmap` (base64-encoded string) from the configuration - required init(config: Config) { } + required init(config: Config) {} func normalize(text: String) -> String { // TODO: This is a simplified implementation. @@ -245,7 +246,7 @@ class PrecompiledNormalizer: Normalizer { // Non-printing control characters output.append("") case 0x0009, 0x000A, 0x000C, 0x000D, 0x1680, 0x200B...0x200F, 0x2028, 0x2029, 0x2581, - 0xFEFF, 0xFFFD: + 0xFEFF, 0xFFFD: // Separators output.append(" ") case 0xFF5E: @@ -259,9 +260,9 @@ class PrecompiledNormalizer: Normalizer { if hasFullwidthTilde { return output - .split(by: "\u{FF5E}") - .map { $0.precomposedStringWithCompatibilityMapping } - .joined(separator: "\u{FF5E}") + .split(by: "\u{FF5E}") + .map { $0.precomposedStringWithCompatibilityMapping } + .joined(separator: "\u{FF5E}") } else { return output.precomposedStringWithCompatibilityMapping } @@ -269,7 +270,7 @@ class PrecompiledNormalizer: Normalizer { } class StripAccentsNormalizer: Normalizer { - required init(config: Config) { } + required init(config: Config) {} func normalize(text: String) -> String { text.precomposedStringWithCompatibilityMapping diff --git a/Sources/Tokenizers/PostProcessor.swift b/Sources/Tokenizers/PostProcessor.swift index 269a73d6..209a57da 100644 --- a/Sources/Tokenizers/PostProcessor.swift +++ b/Sources/Tokenizers/PostProcessor.swift @@ -77,7 +77,7 @@ class TemplateProcessing: PostProcessor { } class ByteLevelPostProcessor: PostProcessor { - required init(config: Config) { } + required init(config: Config) {} func postProcess(tokens: [String], tokensPair: [String]? = nil, addSpecialTokens: Bool = true) -> [String] { tokens } } diff --git a/Sources/Tokenizers/Tokenizer.swift b/Sources/Tokenizers/Tokenizer.swift index 9a440754..72afb9ce 100644 --- a/Sources/Tokenizers/Tokenizer.swift +++ b/Sources/Tokenizers/Tokenizer.swift @@ -375,16 +375,16 @@ public class PreTrainedTokenizer: Tokenizer { return text - .replacingOccurrences(of: " .", with: ".") - .replacingOccurrences(of: " ?", with: "?") - .replacingOccurrences(of: " !", with: "!") - .replacingOccurrences(of: " ,", with: ",") - .replacingOccurrences(of: " ' ", with: "'") - .replacingOccurrences(of: " n't", with: "n't") - .replacingOccurrences(of: " 'm", with: "'m") - .replacingOccurrences(of: " 's", with: "'s") - .replacingOccurrences(of: " 've", with: "'ve") - .replacingOccurrences(of: " 're", with: "'re") + .replacingOccurrences(of: " .", with: ".") + .replacingOccurrences(of: " ?", with: "?") + .replacingOccurrences(of: " !", with: "!") + .replacingOccurrences(of: " ,", with: ",") + .replacingOccurrences(of: " ' ", with: "'") + .replacingOccurrences(of: " n't", with: "n't") + .replacingOccurrences(of: " 'm", with: "'m") + .replacingOccurrences(of: " 's", with: "'s") + .replacingOccurrences(of: " 've", with: "'ve") + .replacingOccurrences(of: " 're", with: "'re") } func fuseUnknown(_ tokens: [String]) -> [String] { @@ -404,11 +404,12 @@ public class PreTrainedTokenizer: Tokenizer { public func tokenize(text: String) -> [String] { // Take care of special tokens first - let sections: [String] = if let regex = addedTokensRegex { - text.split(by: regex) - } else { - [text] - } + let sections: [String] = + if let regex = addedTokensRegex { + text.split(by: regex) + } else { + [text] + } return sections.enumerated().map { section, x in if addedTokens.contains(x) { return [x] } return preTokenize(normalize(x), options: section == 0 ? [.firstSection] : []).flatMap { model($0) } @@ -431,8 +432,8 @@ public class PreTrainedTokenizer: Tokenizer { let specialTokenIDs = Set(specialTokens.values) tokenStrings = tokens - .filter { !specialTokenIDs.contains($0) } - .compactMap { model.convertIdToToken($0) } + .filter { !specialTokenIDs.contains($0) } + .compactMap { model.convertIdToToken($0) } } else { tokenStrings = tokens.compactMap { model.convertIdToToken($0) } } @@ -594,13 +595,13 @@ public class PreTrainedTokenizer: Tokenizer { // MARK: - Building -public struct AutoTokenizer { } +public struct AutoTokenizer {} struct PreTrainedTokenizerClasses { /// Class overrides for custom behaviour /// Not to be confused with the TokenizerModel classes defined in TokenizerModel static let tokenizerClasses: [String: PreTrainedTokenizer.Type] = [ - "LlamaTokenizer": LlamaPreTrainedTokenizer.self, + "LlamaTokenizer": LlamaPreTrainedTokenizer.self ] } @@ -651,7 +652,7 @@ public extension AutoTokenizer { // MARK: - Tokenizer model classes -class T5Tokenizer: UnigramTokenizer { } +class T5Tokenizer: UnigramTokenizer {} // MARK: - PreTrainedTokenizer classes diff --git a/Sources/Tokenizers/UnigramTokenizer.swift b/Sources/Tokenizers/UnigramTokenizer.swift index 8eb57815..d2436250 100644 --- a/Sources/Tokenizers/UnigramTokenizer.swift +++ b/Sources/Tokenizers/UnigramTokenizer.swift @@ -45,7 +45,7 @@ class UnigramTokenizer: PreTrainedTokenizerModel { let tuple = piece.array(or: []) guard let token = tuple.first?.string(), - let scoreValue = tuple.last + let scoreValue = tuple.last else { throw TokenizerError.malformedVocab } diff --git a/Sources/Tokenizers/Utils.swift b/Sources/Tokenizers/Utils.swift index 4398c9bf..becc8df3 100644 --- a/Sources/Tokenizers/Utils.swift +++ b/Sources/Tokenizers/Utils.swift @@ -66,14 +66,7 @@ struct Utils { /// Checks if a character is considered Chinese /// https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block) static func isChineseChar(_ c: UnicodeScalar) -> Bool { - (c.value >= 0x4E00 && c.value <= 0x9FFF) || - (c.value >= 0x3400 && c.value <= 0x4DBF) || - (c.value >= 0x20000 && c.value <= 0x2A6DF) || - (c.value >= 0x2A700 && c.value <= 0x2B73F) || - (c.value >= 0x2B740 && c.value <= 0x2B81F) || - (c.value >= 0x2B820 && c.value <= 0x2CEAF) || - (c.value >= 0xF900 && c.value <= 0xFAFF) || - (c.value >= 0x2F800 && c.value <= 0x2FA1F) + (c.value >= 0x4E00 && c.value <= 0x9FFF) || (c.value >= 0x3400 && c.value <= 0x4DBF) || (c.value >= 0x20000 && c.value <= 0x2A6DF) || (c.value >= 0x2A700 && c.value <= 0x2B73F) || (c.value >= 0x2B740 && c.value <= 0x2B81F) || (c.value >= 0x2B820 && c.value <= 0x2CEAF) || (c.value >= 0xF900 && c.value <= 0xFAFF) || (c.value >= 0x2F800 && c.value <= 0x2FA1F) } } diff --git a/Tests/HubTests/ConfigTests.swift b/Tests/HubTests/ConfigTests.swift index d0208018..8a313381 100644 --- a/Tests/HubTests/ConfigTests.swift +++ b/Tests/HubTests/ConfigTests.swift @@ -308,18 +308,18 @@ struct ConfigTests { @Test("JSON decoding supports UTF-8, UTF-16LE, and UTF-16BE") func textEncoding_utf16Variants() throws { let json = """ - { - "a": ["val_1", "val_2"], - "b": 2, - "c": [[10, "tkn_1"], [12, "tkn_2"], [4, "tkn_3"]], - "d": false, - "e": { - "e_1": 1.1, - "e_2": [1, 2, 3] - }, - "f": null - } - """ + { + "a": ["val_1", "val_2"], + "b": 2, + "c": [[10, "tkn_1"], [12, "tkn_2"], [4, "tkn_3"]], + "d": false, + "e": { + "e_1": 1.1, + "e_2": [1, 2, 3] + }, + "f": null + } + """ let urlUTF8 = try createFile(with: json, encoding: .utf8, fileName: "config_utf8.json") let urlUTF16LE = try createFile(with: json, encoding: .utf16LittleEndian, fileName: "config_utf16_le.json") @@ -390,52 +390,52 @@ struct ConfigTests { "null": Config(), ]) let template = """ - {{ config["dict_of_floats"]["key1"] }} - {{ config["dict_of_tokens"]["key6"]["12"] }} - {{ config["arr_of_ints"][0] }} - {{ config["arr_of_ints"][1] }} - {{ config["arr_of_ints"][2] }} - {{ config["arr_of_floats"][0] }} - {{ config["arr_of_floats"][1] }} - {{ config["arr_of_strings"][0] }} - {{ config["arr_of_strings"][1] }} - {{ config["arr_of_bools"][0] }} - {{ config["arr_of_bools"][1] }} - {{ config["arr_of_dicts"][0]["key7"] }} - {{ config["arr_of_dicts"][1]["key8"] }} - {{ config["arr_of_tokens"][0]["1"] }} - {{ config["arr_of_tokens"][1]["2"] }} - {{ config["int"] }} - {{ config["float"] }} - {{ config["string"] }} - {{ config["bool"] }} - {{ config["token"]["1"] }} - """ + {{ config["dict_of_floats"]["key1"] }} + {{ config["dict_of_tokens"]["key6"]["12"] }} + {{ config["arr_of_ints"][0] }} + {{ config["arr_of_ints"][1] }} + {{ config["arr_of_ints"][2] }} + {{ config["arr_of_floats"][0] }} + {{ config["arr_of_floats"][1] }} + {{ config["arr_of_strings"][0] }} + {{ config["arr_of_strings"][1] }} + {{ config["arr_of_bools"][0] }} + {{ config["arr_of_bools"][1] }} + {{ config["arr_of_dicts"][0]["key7"] }} + {{ config["arr_of_dicts"][1]["key8"] }} + {{ config["arr_of_tokens"][0]["1"] }} + {{ config["arr_of_tokens"][1]["2"] }} + {{ config["int"] }} + {{ config["float"] }} + {{ config["string"] }} + {{ config["bool"] }} + {{ config["token"]["1"] }} + """ let exp = """ - 1.1 - dfe - 1 - 2 - 3 - 1.1 - 1.2 - tre - jeq - true - false - 1.1 - 1.2 - ghz - pkr - 678 - 1.1 - hha - true - iop - """ + 1.1 + dfe + 1 + 2 + 3 + 1.1 + 1.2 + tre + jeq + true + false + 1.1 + 1.2 + ghz + pkr + 678 + 1.1 + hha + true + iop + """ let got = try Template(template).render([ - "config": cfg.toJinjaCompatible(), + "config": cfg.toJinjaCompatible() ]) #expect(got == exp) diff --git a/Tests/HubTests/DownloaderTests.swift b/Tests/HubTests/DownloaderTests.swift index f79ae5da..aecfa778 100644 --- a/Tests/HubTests/DownloaderTests.swift +++ b/Tests/HubTests/DownloaderTests.swift @@ -9,7 +9,6 @@ import Combine import XCTest @testable import Hub -import XCTest final class DownloaderTests: XCTestCase { var tempDir: URL! @@ -34,18 +33,18 @@ final class DownloaderTests: XCTestCase { let etag = try await Hub.getFileMetadata(fileURL: url).etag! let destination = tempDir.appendingPathComponent("config.json") let fileContent = """ - { - "architectures": [ - "LlamaForCausalLM" - ], - "bos_token_id": 1, - "eos_token_id": 2, - "model_type": "llama", - "pad_token_id": 0, - "vocab_size": 32000 - } + { + "architectures": [ + "LlamaForCausalLM" + ], + "bos_token_id": 1, + "eos_token_id": 2, + "model_type": "llama", + "pad_token_id": 0, + "vocab_size": 32000 + } - """ + """ let cacheDir = tempDir.appendingPathComponent("cache") try? FileManager.default.createDirectory(at: cacheDir, withIntermediateDirectories: true) diff --git a/Tests/HubTests/HubApiTests.swift b/Tests/HubTests/HubApiTests.swift index 5c89e9b2..aee71a6d 100644 --- a/Tests/HubTests/HubApiTests.swift +++ b/Tests/HubTests/HubApiTests.swift @@ -159,7 +159,7 @@ class HubApiTests: XCTestCase { let url = URL( string: - "https://huggingface.co/coreml-projects/Llama-2-7b-chat-coreml/resolve/main/llama-2-7b-chat.mlpackage/Data/com.apple.CoreML/model.mlmodel" + "https://huggingface.co/coreml-projects/Llama-2-7b-chat-coreml/resolve/main/llama-2-7b-chat.mlpackage/Data/com.apple.CoreML/model.mlmodel" ) let metadata = try await Hub.getFileMetadata(fileURL: url!) @@ -181,7 +181,7 @@ class SnapshotDownloadTests: XCTestCase { return base.appending(component: "huggingface-tests") }() - override func setUp() { } + override func setUp() {} override func tearDown() { do { @@ -268,7 +268,7 @@ class SnapshotDownloadTests: XCTestCase { XCTAssertEqual( Set(downloadedFilenames), Set([ - "llama-2-7b-chat.mlpackage/Data/com.apple.CoreML/Metadata.json", + "llama-2-7b-chat.mlpackage/Data/com.apple.CoreML/Metadata.json" ]) ) } @@ -417,7 +417,7 @@ class SnapshotDownloadTests: XCTestCase { XCTAssertEqual( Set(downloadedMetadataFilenames), Set([ - ".cache/huggingface/download/tokenizer.json.metadata", + ".cache/huggingface/download/tokenizer.json.metadata" ]) ) @@ -546,7 +546,7 @@ class SnapshotDownloadTests: XCTestCase { XCTAssertEqual( Set(downloadedMetadataFilenames), Set([ - ".cache/huggingface/download/llama-2-7b-chat.mlpackage/Data/com.apple.CoreML/model.mlmodel.metadata", + ".cache/huggingface/download/llama-2-7b-chat.mlpackage/Data/com.apple.CoreML/model.mlmodel.metadata" ]) ) @@ -1011,17 +1011,17 @@ class SnapshotDownloadTests: XCTestCase { let fileContents = try String(contentsOfFile: downloadedTo.appendingPathComponent("config.json").path) let expected = """ - { - "architectures": [ - "LlamaForCausalLM" - ], - "bos_token_id": 1, - "eos_token_id": 2, - "model_type": "llama", - "pad_token_id": 0, - "vocab_size": 32000 - } - """ + { + "architectures": [ + "LlamaForCausalLM" + ], + "bos_token_id": 1, + "eos_token_id": 2, + "model_type": "llama", + "pad_token_id": 0, + "vocab_size": 32000 + } + """ XCTAssertTrue(fileContents.contains(expected)) } @@ -1050,17 +1050,17 @@ class SnapshotDownloadTests: XCTestCase { let fileContents = try String(contentsOfFile: downloadedTo.appendingPathComponent("config.json").path) let expected = """ - X - "architectures": [ - "LlamaForCausalLM" - ], - "bos_token_id": 1, - "eos_token_id": 2, - "model_type": "llama", - "pad_token_id": 0, - "vocab_size": 32000 - } - """ + X + "architectures": [ + "LlamaForCausalLM" + ], + "bos_token_id": 1, + "eos_token_id": 2, + "model_type": "llama", + "pad_token_id": 0, + "vocab_size": 32000 + } + """ XCTAssertTrue(fileContents.contains(expected)) } diff --git a/Tests/HubTests/HubTests.swift b/Tests/HubTests/HubTests.swift index 1fcb5a4c..92458d51 100644 --- a/Tests/HubTests/HubTests.swift +++ b/Tests/HubTests/HubTests.swift @@ -4,16 +4,17 @@ // Created by Pedro Cuenca on 18/05/2023. // -@testable import Hub import XCTest +@testable import Hub + class HubTests: XCTestCase { let downloadDestination: URL = { let base = FileManager.default.urls(for: .cachesDirectory, in: .userDomainMask).first! return base.appending(component: "huggingface-tests") }() - override func setUp() { } + override func setUp() {} override func tearDown() { do { diff --git a/Tests/ModelsTests/WeightsTests.swift b/Tests/ModelsTests/WeightsTests.swift index 55bac426..c9071256 100644 --- a/Tests/ModelsTests/WeightsTests.swift +++ b/Tests/ModelsTests/WeightsTests.swift @@ -1,7 +1,8 @@ import Foundation +import Testing + @testable import Hub @testable import Models -import Testing @Suite("Weights Tests") struct WeightsTests { diff --git a/Tests/TokenizersTests/BertTokenizerTests.swift b/Tests/TokenizersTests/BertTokenizerTests.swift index 5686c0ab..7cad3e34 100644 --- a/Tests/TokenizersTests/BertTokenizerTests.swift +++ b/Tests/TokenizersTests/BertTokenizerTests.swift @@ -7,8 +7,9 @@ // import Foundation -@testable import Hub import Testing + +@testable import Hub @testable import Tokenizers /// Stanford Question Answering Dataset (SQuAD) @@ -219,27 +220,27 @@ struct BertTokenizerTests { @Test("BERT encoder/decoder round-trip") func encoderDecoder() { let text = """ - Wake up (Wake up) - Grab a brush and put a little makeup - Hide your scars to fade away the shakeup (Hide the scars to fade away the shakeup) - Why'd you leave the keys upon the table? - Here you go, create another fable, you wanted to - Grab a brush and put a little makeup, you wanted to - Hide the scars to fade away the shakeup, you wanted to - Why'd you leave the keys upon the table? You wanted to - """ + Wake up (Wake up) + Grab a brush and put a little makeup + Hide your scars to fade away the shakeup (Hide the scars to fade away the shakeup) + Why'd you leave the keys upon the table? + Here you go, create another fable, you wanted to + Grab a brush and put a little makeup, you wanted to + Hide the scars to fade away the shakeup, you wanted to + Why'd you leave the keys upon the table? You wanted to + """ // Not sure if there's a way to achieve a non-destructive round-trip let decoded = """ - wake up ( wake up ) - grab a brush and put a little makeup - hide your scars to fade away the shakeup ( hide the scars to fade away the shakeup ) - why \' d you leave the keys upon the table ? - here you go , create another fable , you wanted to - grab a brush and put a little makeup , you wanted to - hide the scars to fade away the shakeup , you wanted to - why \' d you leave the keys upon the table ? you wanted to - """ + wake up ( wake up ) + grab a brush and put a little makeup + hide your scars to fade away the shakeup ( hide the scars to fade away the shakeup ) + why \' d you leave the keys upon the table ? + here you go , create another fable , you wanted to + grab a brush and put a little makeup , you wanted to + hide the scars to fade away the shakeup , you wanted to + why \' d you leave the keys upon the table ? you wanted to + """ let tokenizer = bertTokenizer for (line, expected) in zip(text.split(separator: "\n"), decoded.split(separator: "\n")) { diff --git a/Tests/TokenizersTests/ChatTemplateTests.swift b/Tests/TokenizersTests/ChatTemplateTests.swift index afba4402..15d74c4f 100644 --- a/Tests/TokenizersTests/ChatTemplateTests.swift +++ b/Tests/TokenizersTests/ChatTemplateTests.swift @@ -15,7 +15,7 @@ struct ChatTemplateTests { [ "role": "user", "content": "Describe the Swift programming language.", - ], + ] ] static let phiTokenizerTask = Task { @@ -135,7 +135,7 @@ struct ChatTemplateTests { [ "role": "user", "content": "🥳🥳🥳", - ], + ] ] let encoded = try tokenizer.applyChatTemplate(messages: testMessages) @@ -170,7 +170,7 @@ struct ChatTemplateTests { [ "role": "user", "content": "What is the weather in Paris today?", - ], + ] ] let getCurrentWeatherToolSpec: [String: Any] = [ @@ -203,7 +203,7 @@ struct ChatTemplateTests { func assertDictsAreEqual(_ actual: [String: Any], _ expected: [String: Any]) { for (key, value) in actual { if let nestedDict = value as? [String: Any], - let nestedDict2 = expected[key] as? [String: Any] + let nestedDict2 = expected[key] as? [String: Any] { assertDictsAreEqual(nestedDict, nestedDict2) } else if let arrayValue = value as? [String] { @@ -217,9 +217,9 @@ struct ChatTemplateTests { } if let startRange = decoded.range(of: "\n"), - let endRange = decoded.range( - of: "\n", range: startRange.upperBound..", range: startRange.upperBound..system - You are Qwen, created by Alibaba Cloud. You are a helpful assistant. + <|im_start|>system + You are Qwen, created by Alibaba Cloud. You are a helpful assistant. - # Tools + # Tools - You may call one or more functions to assist with the user query. + You may call one or more functions to assist with the user query. - You are provided with function signatures within XML tags: - - """ + You are provided with function signatures within XML tags: + + """ let expectedPromptEnd = """ - + - For each function call, return a json object with function name and arguments within XML tags: - - {"name": , "arguments": } - <|im_end|> - <|im_start|>user - What is the weather in Paris today?<|im_end|> - <|im_start|>assistant + For each function call, return a json object with function name and arguments within XML tags: + + {"name": , "arguments": } + <|im_end|> + <|im_start|>user + What is the weather in Paris today?<|im_end|> + <|im_start|>assistant - """ + """ #expect( decoded.hasPrefix(expectedPromptStart), @@ -282,7 +282,7 @@ struct ChatTemplateTests { "image_url": "example.jpg", ] as [String: String], ] as [[String: String]], - ] as [String: Any], + ] as [String: Any] ] as [[String: Any]] // Qwen 2 VL does not have a chat_template.json file. The chat template is in tokenizer_config.json. let qwen2VLTokenizer = try await AutoTokenizer.from( @@ -295,13 +295,13 @@ struct ChatTemplateTests { let qwen2_5VLEncoded = try qwen2_5VLTokenizer.applyChatTemplate(messages: visionMessages) let qwen2_5VLDecoded = qwen2_5VLTokenizer.decode(tokens: qwen2_5VLEncoded) let expectedOutput = """ - <|im_start|>system - You are a helpful assistant.<|im_end|> - <|im_start|>user - What's in this image?<|vision_start|><|image_pad|><|vision_end|><|im_end|> - <|im_start|>assistant + <|im_start|>system + You are a helpful assistant.<|im_end|> + <|im_start|>user + What's in this image?<|vision_start|><|image_pad|><|vision_end|><|im_end|> + <|im_start|>assistant - """ + """ #expect(qwen2VLEncoded == qwen2_5VLEncoded, "Encoded sequences should be equal") #expect(qwen2VLDecoded == qwen2_5VLDecoded, "Decoded sequences should be equal") #expect(qwen2_5VLDecoded == expectedOutput, "Decoded sequence should match expected output") diff --git a/Tests/TokenizersTests/NormalizerTests.swift b/Tests/TokenizersTests/NormalizerTests.swift index 85a947ce..eb29a211 100644 --- a/Tests/TokenizersTests/NormalizerTests.swift +++ b/Tests/TokenizersTests/NormalizerTests.swift @@ -129,7 +129,7 @@ struct NormalizerTests { @Test("Strip accents functionality") func stripAccents() { let testCases = [ - ("département", "departement"), + ("département", "departement") ] // TODO: test combinations with/without lowercase diff --git a/Tests/TokenizersTests/PreTokenizerTests.swift b/Tests/TokenizersTests/PreTokenizerTests.swift index 6e2b8842..b0cd4ba6 100644 --- a/Tests/TokenizersTests/PreTokenizerTests.swift +++ b/Tests/TokenizersTests/PreTokenizerTests.swift @@ -92,12 +92,12 @@ struct PreTokenizerTests { ) #expect( preTokenizer3.preTokenize(text: "Hey friend! How are you?!?") == [ - "HeyĠfriend!ĠĠĠĠĠHowĠareĠyou?!?", + "HeyĠfriend!ĠĠĠĠĠHowĠareĠyou?!?" ] ) #expect( preTokenizer3.preTokenize(text: " Hey, friend, what's up? ") == [ - "ĠĠĠHey,ĠĠĠĠfriend,ĠĠĠĠwhat'sĠup?ĠĠ", + "ĠĠĠHey,ĠĠĠĠfriend,ĠĠĠĠwhat'sĠup?ĠĠ" ] ) } @@ -159,7 +159,7 @@ struct PreTokenizerTests { config: Config([ "pattern": [ "Regex": - "(?i:\'s|\'t|\'re|\'ve|\'m|\'ll|\'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+", + "(?i:\'s|\'t|\'re|\'ve|\'m|\'ll|\'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" ], "invert": true, ])) #expect( @@ -191,8 +191,8 @@ struct PreTokenizerTests { let text = "Hey my friend how▁are you" let tokens = text - .split(by: "", includeSeparators: true) - .flatMap { preTokenizer.preTokenize(text: $0) } + .split(by: "", includeSeparators: true) + .flatMap { preTokenizer.preTokenize(text: $0) } #expect( tokens == ["▁Hey", "▁my", "▁friend", "▁", "▁", "▁how", "▁are", "▁you"] diff --git a/Tests/TokenizersTests/SplitTests.swift b/Tests/TokenizersTests/SplitTests.swift index db2d16d1..5695b683 100644 --- a/Tests/TokenizersTests/SplitTests.swift +++ b/Tests/TokenizersTests/SplitTests.swift @@ -17,107 +17,88 @@ struct SplitTests { let captureRegex = try! NSRegularExpression(pattern: addedTokensRegexp, options: []) #expect( - "eating <|raw|> meat <|end|> That's all".split(by: captureRegex) == - ["eating ", "<|raw|>", "meat ", "<|end|>", "That's all"] + "eating <|raw|> meat <|end|> That's all".split(by: captureRegex) == ["eating ", "<|raw|>", "meat ", "<|end|>", "That's all"] ) #expect( - "<|raw|>".split(by: captureRegex) == - ["<|raw|>"] + "<|raw|>".split(by: captureRegex) == ["<|raw|>"] ) #expect( - "This string doesn't have those separators".split(by: captureRegex) == - ["This string doesn't have those separators"] + "This string doesn't have those separators".split(by: captureRegex) == ["This string doesn't have those separators"] ) #expect( - "start <|end|>".split(by: captureRegex) == - ["start ", "<|end|>"] + "start <|end|>".split(by: captureRegex) == ["start ", "<|end|>"] ) #expect( - "start <|end|> ".split(by: captureRegex) == - ["start ", "<|end|>"] + "start <|end|> ".split(by: captureRegex) == ["start ", "<|end|>"] ) #expect( - "start <|end|> ".split(by: captureRegex) == - ["start ", "<|end|>"] + "start <|end|> ".split(by: captureRegex) == ["start ", "<|end|>"] ) #expect( - "start <|end|> for real".split(by: captureRegex) == - ["start ", "<|end|>", "for real"] + "start <|end|> for real".split(by: captureRegex) == ["start ", "<|end|>", "for real"] ) #expect( - "<|raw|><|end|>".split(by: captureRegex) == - ["<|raw|>", "<|end|>"] + "<|raw|><|end|>".split(by: captureRegex) == ["<|raw|>", "<|end|>"] ) } @Test("Split behavior merged with previous") func splitBehaviorMergedWithPrevious() { #expect( - "the-final--countdown".split(by: "-", options: .caseInsensitive, behavior: .mergedWithPrevious) == - ["the-", "final-", "-", "countdown"] + "the-final--countdown".split(by: "-", options: .caseInsensitive, behavior: .mergedWithPrevious) == ["the-", "final-", "-", "countdown"] ) #expect( - "the-final--countdown-".split(by: "-", options: .caseInsensitive, behavior: .mergedWithPrevious) == - ["the-", "final-", "-", "countdown-"] + "the-final--countdown-".split(by: "-", options: .caseInsensitive, behavior: .mergedWithPrevious) == ["the-", "final-", "-", "countdown-"] ) #expect( - "the-final--countdown--".split(by: "-", options: .caseInsensitive, behavior: .mergedWithPrevious) == - ["the-", "final-", "-", "countdown-", "-"] + "the-final--countdown--".split(by: "-", options: .caseInsensitive, behavior: .mergedWithPrevious) == ["the-", "final-", "-", "countdown-", "-"] ) #expect( - "-the-final--countdown--".split(by: "-", options: .caseInsensitive, behavior: .mergedWithPrevious) == - ["-", "the-", "final-", "-", "countdown-", "-"] + "-the-final--countdown--".split(by: "-", options: .caseInsensitive, behavior: .mergedWithPrevious) == ["-", "the-", "final-", "-", "countdown-", "-"] ) #expect( - "--the-final--countdown--".split(by: "-", options: .caseInsensitive, behavior: .mergedWithPrevious) == - ["-", "-", "the-", "final-", "-", "countdown-", "-"] + "--the-final--countdown--".split(by: "-", options: .caseInsensitive, behavior: .mergedWithPrevious) == ["-", "-", "the-", "final-", "-", "countdown-", "-"] ) } @Test("Split behavior merged with next") func splitBehaviorMergedWithNext() { #expect( - "the-final--countdown".split(by: "-", options: .caseInsensitive, behavior: .mergedWithNext) == - ["the", "-final", "-", "-countdown"] + "the-final--countdown".split(by: "-", options: .caseInsensitive, behavior: .mergedWithNext) == ["the", "-final", "-", "-countdown"] ) #expect( - "-the-final--countdown".split(by: "-", options: .caseInsensitive, behavior: .mergedWithNext) == - ["-the", "-final", "-", "-countdown"] + "-the-final--countdown".split(by: "-", options: .caseInsensitive, behavior: .mergedWithNext) == ["-the", "-final", "-", "-countdown"] ) #expect( - "--the-final--countdown".split(by: "-", options: .caseInsensitive, behavior: .mergedWithNext) == - ["-", "-the", "-final", "-", "-countdown"] + "--the-final--countdown".split(by: "-", options: .caseInsensitive, behavior: .mergedWithNext) == ["-", "-the", "-final", "-", "-countdown"] ) #expect( - "--the-final--countdown-".split(by: "-", options: .caseInsensitive, behavior: .mergedWithNext) == - ["-", "-the", "-final", "-", "-countdown", "-"] + "--the-final--countdown-".split(by: "-", options: .caseInsensitive, behavior: .mergedWithNext) == ["-", "-the", "-final", "-", "-countdown", "-"] ) } @Test("Split behavior isolated and removed") func splitBehaviorOther() { #expect( - "the-final--countdown".split(by: "-", options: .caseInsensitive, behavior: .isolated) == - ["the", "-", "final", "-", "-", "countdown"] + "the-final--countdown".split(by: "-", options: .caseInsensitive, behavior: .isolated) == ["the", "-", "final", "-", "-", "countdown"] ) #expect( - "the-final--countdown".split(by: "-", options: .caseInsensitive, behavior: .removed) == - ["the", "final", "countdown"] + "the-final--countdown".split(by: "-", options: .caseInsensitive, behavior: .removed) == ["the", "final", "countdown"] ) } } diff --git a/Tests/TokenizersTests/TrieTests.swift b/Tests/TokenizersTests/TrieTests.swift index b5aae981..c720b191 100644 --- a/Tests/TokenizersTests/TrieTests.swift +++ b/Tests/TokenizersTests/TrieTests.swift @@ -7,6 +7,7 @@ import Foundation import Testing + @testable import Tokenizers @Suite("Trie data structure functionality")