diff --git a/src/index.ts b/src/index.ts index ecd4078..d2fe690 100644 --- a/src/index.ts +++ b/src/index.ts @@ -13,7 +13,7 @@ import { import { getRecipients } from './email'; -import { parseBoolean } from './string'; +import { parseBoolean, splitWords, joinWords } from './string'; import { sortAsc, quantile, @@ -49,7 +49,9 @@ export { applyRotationTransform, normalizeToPercentage, getUndefinedVariablesInMessage, + joinWords, parseBoolean, + splitWords, quantile, replaceVariablesInMessage, sortAsc, diff --git a/src/string.ts b/src/string.ts index 1971547..b7c2194 100644 --- a/src/string.ts +++ b/src/string.ts @@ -16,3 +16,103 @@ export function parseBoolean(candidate: string | number) { return false; } } + +/** + * Splits a string into an array of words, respecting quoted phrases. + * Handles comma-separated values where phrases in double quotes are treated as a single unit, + * even if they contain commas. + * + * This is basically CSV parsing, if we find this to be cumbersome to maintain, + * we can consider using a dedicated CSV parsing library. + * + * Example: '"apple, banana", cherry' => ['apple, banana', 'cherry'] + * + * @param {string} candidate - The input string to split + * @return {string[]} - Array of split words/phrases + */ +export function splitWords(candidate: string): string[] { + // Handle empty input case + if (!candidate) return []; + + const result = []; + let currentWord = ''; + let inQuotes = false; // Tracks whether we're currently inside a quoted section + + // Process the string character by character: + // 1. Toggle quote state when we encounter double quotes (") - marking quoted sections + // Quotes themselves are not included in the final words + // 2. When we hit a comma outside quotes, it's treated as a word separator: + // - Current word is trimmed and added to results array + // - Current word buffer is reset for the next word + // 3. Inside quoted sections, commas are treated as regular characters (part of the word) + // 4. All other characters are added to the current word being built + for (let i = 0; i < candidate.length; i++) { + const char = candidate[i]; + + if (char === '"') { + inQuotes = !inQuotes; + continue; // Skip adding the quote character to the result + } + + if (char === ',' && !inQuotes) { + result.push(currentWord.trim()); // Add the completed word to results + currentWord = ''; // Reset for the next word + continue; + } + + currentWord += char; + } + + // Check for mismatched quotes + if (inQuotes) { + throw new Error('Mismatched quotes in input string'); + } + + // Add the last word to the result array + // + // There are two cases we need to handle here: + // + // 1. The normal case: we need to add the last word if there is one. + // This is because our loop only adds words when it encounters a delimiter, + // but the last word won't have a delimiter after it. + // Example: "a,b,c" -> After processing "a," and "b,", we still need to add "c" + // + // 2. Trailing delimiter case: In CSV parsing, trailing delimiters are significant + // and indicate an empty field at the end. + // Example: "a,b," represents ["a", "b", ""] + // + // The normal flow only adds non-empty words after trimming, which means + // if the input ends with a delimiter, the trailing empty field would be lost. + // We need to handle this case separately to preserve the empty trailing field. + if (currentWord.trim()) { + result.push(currentWord.trim()); + } else if (candidate.endsWith(',')) { + // If input ends with a delimiter, add an empty field + result.push(''); + } + + return result; +} + +/** + * Joins an array of words into a string, properly quoting phrases that contain commas + * to ensure they can be correctly split later using splitWords. + * + * Example: ['apple, banana', 'cherry'] => '"apple, banana",cherry' + * + * @param {string[]} words - Array of words/phrases to join + * @return {string} - The joined string + */ +export function joinWords(words: string[]): string { + if (!words || words.length === 0) return ''; + + return words + .map(word => { + // If the word contains a comma, wrap it in double quotes + if (word.includes(',')) { + return `"${word}"`; + } + return word; + }) + .join(','); +} diff --git a/test/string.test.ts b/test/string.test.ts index 9a85eb2..00b5a90 100644 --- a/test/string.test.ts +++ b/test/string.test.ts @@ -1,4 +1,4 @@ -import { parseBoolean } from '../src'; +import { parseBoolean, splitWords, joinWords } from '../src'; describe('#parseBoolean', () => { test('returns true for input "true"', () => { @@ -37,3 +37,116 @@ describe('#parseBoolean', () => { expect(parseBoolean(undefined)).toBe(false); }); }); + +describe('#splitWords', () => { + test('returns an array of words for input "apple,banana,cherry"', () => { + expect(splitWords('apple,banana,cherry')).toEqual([ + 'apple', + 'banana', + 'cherry', + ]); + }); + + test('returns an empty array for input ""', () => { + expect(splitWords('')).toEqual([]); + }); + + test('returns an array with a single word for input "apple"', () => { + expect(splitWords('apple')).toEqual(['apple']); + }); + + test('allows phrases without double quotes', () => { + expect(splitWords('apple banana, cherry')).toEqual([ + 'apple banana', + 'cherry', + ]); + }); + + test('allows phrases with double quotes', () => { + expect(splitWords('"apple banana", cherry')).toEqual([ + 'apple banana', + 'cherry', + ]); + }); + + test('allows phrases with double quotes and commas', () => { + expect(splitWords('"apple, banana", cherry')).toEqual([ + 'apple, banana', + 'cherry', + ]); + }); + + test('throws error for mismatched quotes', () => { + expect(() => splitWords('"apple, banana, cherry')).toThrow( + 'Mismatched quotes in input string' + ); + }); + + test('preserves trailing empty field when input ends with a delimiter', () => { + expect(splitWords('apple,banana,')).toEqual(['apple', 'banana', '']); + expect(splitWords('apple,')).toEqual(['apple', '']); + expect(splitWords(',')).toEqual(['', '']); + }); +}); + +describe('#joinWords', () => { + test('joins array of words with commas', () => { + expect(joinWords(['apple', 'banana', 'cherry'])).toBe( + 'apple,banana,cherry' + ); + }); + + test('adds quotes around words containing commas', () => { + expect(joinWords(['apple, banana', 'cherry'])).toBe( + '"apple, banana",cherry' + ); + }); + + test('handles empty array', () => { + expect(joinWords([])).toBe(''); + }); + + test('handles undefined or null input', () => { + // @ts-ignore + expect(joinWords(undefined)).toBe(''); + // @ts-ignore + expect(joinWords(null)).toBe(''); + }); + + test('handles array with empty strings', () => { + expect(joinWords(['', ''])).toBe(','); + expect(joinWords(['apple', ''])).toBe('apple,'); + }); +}); + +describe('lossless conversion between splitWords and joinWords', () => { + test('splitting and joining simple string preserves original', () => { + const original = 'apple,banana,cherry'; + const split = splitWords(original); + const joined = joinWords(split); + expect(joined).toBe(original); + }); + + test('splitting and joining string with quoted phrases preserves semantics', () => { + const original = '"apple, banana",cherry'; + const split = splitWords(original); + const joined = joinWords(split); + expect(split).toEqual(['apple, banana', 'cherry']); + expect(splitWords(joined)).toEqual(split); + }); + + test('splitting and joining handles empty fields correctly', () => { + const original = 'apple,banana,'; + const split = splitWords(original); + const joined = joinWords(split); + expect(joined).toBe(original); + expect(splitWords(joined)).toEqual(split); + }); + + test('joining and splitting preserves original array', () => { + const original = ['apple', 'banana, with comma', 'cherry']; + const joined = joinWords(original); + const split = splitWords(joined); + expect(split).toEqual(original); + }); +});