From 8aba6fca572dc61eea7e9cff2a3bcd49cbdd4e9f Mon Sep 17 00:00:00 2001 From: Shivam Mishra Date: Sun, 27 Apr 2025 11:13:56 +0530 Subject: [PATCH 1/6] feat: allow splitting words --- src/index.ts | 3 ++- src/string.ts | 52 +++++++++++++++++++++++++++++++++++++++++++++ test/string.test.ts | 47 +++++++++++++++++++++++++++++++++++++++- 3 files changed, 100 insertions(+), 2 deletions(-) diff --git a/src/index.ts b/src/index.ts index ecd4078..4dfb0f8 100644 --- a/src/index.ts +++ b/src/index.ts @@ -13,7 +13,7 @@ import { import { getRecipients } from './email'; -import { parseBoolean } from './string'; +import { parseBoolean, splitWords } from './string'; import { sortAsc, quantile, @@ -50,6 +50,7 @@ export { normalizeToPercentage, getUndefinedVariablesInMessage, parseBoolean, + splitWords, quantile, replaceVariablesInMessage, sortAsc, diff --git a/src/string.ts b/src/string.ts index 1971547..596fc77 100644 --- a/src/string.ts +++ b/src/string.ts @@ -16,3 +16,55 @@ export function parseBoolean(candidate: string | number) { return false; } } + +/** + * Splits a string into an array of words, respecting quoted phrases. + * Handles comma-separated values where phrases in double quotes are treated as a single unit, + * even if they contain commas. + * + * Example: '"apple, banana", cherry' => ['apple, banana', 'cherry'] + * + * @param {string} candidate - The input string to split + * @return {string[]} - Array of split words/phrases + */ +export function splitWords(candidate: string): string[] { + // Handle empty input case + if (!candidate) return ['']; + + const result = []; + let currentWord = ''; + let inQuotes = false; // Tracks whether we're currently inside a quoted section + + // Process the string character by character + for (let i = 0; i < candidate.length; i++) { + const char = candidate[i]; + + // Toggle our "inside quotes" state when we encounter a quote character + if (char === '"') { + inQuotes = !inQuotes; + continue; // Skip adding the quote character to the result + } + + // Only treat commas as delimiters when not inside quotes + if (char === ',' && !inQuotes) { + result.push(currentWord.trim()); // Add the completed word to results + currentWord = ''; // Reset for the next word + continue; + } + + // For all other characters, add to the current word + currentWord += char; + } + + // Check for mismatched quotes + if (inQuotes) { + throw new Error('Mismatched quotes in input string'); + } + + // Don't forget to add the last word if there is one + if (currentWord.trim()) { + result.push(currentWord.trim()); + } + + return result; +} diff --git a/test/string.test.ts b/test/string.test.ts index 9a85eb2..73c1dbf 100644 --- a/test/string.test.ts +++ b/test/string.test.ts @@ -1,4 +1,4 @@ -import { parseBoolean } from '../src'; +import { parseBoolean, splitWords } from '../src'; describe('#parseBoolean', () => { test('returns true for input "true"', () => { @@ -37,3 +37,48 @@ describe('#parseBoolean', () => { expect(parseBoolean(undefined)).toBe(false); }); }); + +describe('#splitWords', () => { + test('returns an array of words for input "apple,banana,cherry"', () => { + expect(splitWords('apple,banana,cherry')).toEqual([ + 'apple', + 'banana', + 'cherry', + ]); + }); + + test('returns an empty array for input ""', () => { + expect(splitWords('')).toEqual(['']); + }); + + test('returns an array with a single word for input "apple"', () => { + expect(splitWords('apple')).toEqual(['apple']); + }); + + test('allows phrases without double quotes', () => { + expect(splitWords('apple banana, cherry')).toEqual([ + 'apple banana', + 'cherry', + ]); + }); + + test('allows phrases with double quotes', () => { + expect(splitWords('"apple banana", cherry')).toEqual([ + 'apple banana', + 'cherry', + ]); + }); + + test('allows phrases with double quotes and commas', () => { + expect(splitWords('"apple, banana", cherry')).toEqual([ + 'apple, banana', + 'cherry', + ]); + }); + + test('throws error for mismatched quotes', () => { + expect(() => splitWords('"apple, banana, cherry')).toThrow( + 'Mismatched quotes in input string' + ); + }); +}); From 7847ef15c88509dd6b08f2db44743451db228745 Mon Sep 17 00:00:00 2001 From: Shivam Mishra Date: Sun, 27 Apr 2025 11:14:30 +0530 Subject: [PATCH 2/6] chore: add comments --- src/string.ts | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/src/string.ts b/src/string.ts index 596fc77..33422c6 100644 --- a/src/string.ts +++ b/src/string.ts @@ -21,41 +21,44 @@ export function parseBoolean(candidate: string | number) { * Splits a string into an array of words, respecting quoted phrases. * Handles comma-separated values where phrases in double quotes are treated as a single unit, * even if they contain commas. - * + * + * This is basically CSV parsing, if we find this to be cumbersome to maintain, + * we can consider using a dedicated CSV parsing library. + * * Example: '"apple, banana", cherry' => ['apple, banana', 'cherry'] - * + * * @param {string} candidate - The input string to split * @return {string[]} - Array of split words/phrases */ export function splitWords(candidate: string): string[] { // Handle empty input case if (!candidate) return ['']; - + const result = []; let currentWord = ''; - let inQuotes = false; // Tracks whether we're currently inside a quoted section - + let inQuotes = false; // Tracks whether we're currently inside a quoted section + // Process the string character by character for (let i = 0; i < candidate.length; i++) { const char = candidate[i]; - + // Toggle our "inside quotes" state when we encounter a quote character if (char === '"') { inQuotes = !inQuotes; - continue; // Skip adding the quote character to the result + continue; // Skip adding the quote character to the result } - + // Only treat commas as delimiters when not inside quotes if (char === ',' && !inQuotes) { - result.push(currentWord.trim()); // Add the completed word to results - currentWord = ''; // Reset for the next word + result.push(currentWord.trim()); // Add the completed word to results + currentWord = ''; // Reset for the next word continue; } - + // For all other characters, add to the current word currentWord += char; } - + // Check for mismatched quotes if (inQuotes) { throw new Error('Mismatched quotes in input string'); @@ -65,6 +68,6 @@ export function splitWords(candidate: string): string[] { if (currentWord.trim()) { result.push(currentWord.trim()); } - + return result; } From de829a0a3b10206839623ef4a6c4d62ba7a7c199 Mon Sep 17 00:00:00 2001 From: Shivam Mishra Date: Sun, 27 Apr 2025 11:18:31 +0530 Subject: [PATCH 3/6] chore: update comments --- src/string.ts | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/string.ts b/src/string.ts index 33422c6..d615c5f 100644 --- a/src/string.ts +++ b/src/string.ts @@ -38,24 +38,28 @@ export function splitWords(candidate: string): string[] { let currentWord = ''; let inQuotes = false; // Tracks whether we're currently inside a quoted section - // Process the string character by character + // Process the string character by character: + // 1. Toggle quote state when we encounter double quotes (") - marking quoted sections + // Quotes themselves are not included in the final words + // 2. When we hit a comma outside quotes, it's treated as a word separator: + // - Current word is trimmed and added to results array + // - Current word buffer is reset for the next word + // 3. Inside quoted sections, commas are treated as regular characters (part of the word) + // 4. All other characters are added to the current word being built for (let i = 0; i < candidate.length; i++) { const char = candidate[i]; - // Toggle our "inside quotes" state when we encounter a quote character if (char === '"') { inQuotes = !inQuotes; continue; // Skip adding the quote character to the result } - // Only treat commas as delimiters when not inside quotes if (char === ',' && !inQuotes) { result.push(currentWord.trim()); // Add the completed word to results currentWord = ''; // Reset for the next word continue; } - // For all other characters, add to the current word currentWord += char; } From 7c1627cd98fc4852d4d255f7d7687ac1c567119b Mon Sep 17 00:00:00 2001 From: Shivam Mishra Date: Sun, 27 Apr 2025 11:26:05 +0530 Subject: [PATCH 4/6] feat: handle delimiter --- src/string.ts | 20 +++++++++++++++++++- test/string.test.ts | 6 ++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/string.ts b/src/string.ts index d615c5f..294571d 100644 --- a/src/string.ts +++ b/src/string.ts @@ -68,9 +68,27 @@ export function splitWords(candidate: string): string[] { throw new Error('Mismatched quotes in input string'); } - // Don't forget to add the last word if there is one + // Add the last word to the result array + // + // There are two cases we need to handle here: + // + // 1. The normal case: we need to add the last word if there is one. + // This is because our loop only adds words when it encounters a delimiter, + // but the last word won't have a delimiter after it. + // Example: "a,b,c" -> After processing "a," and "b,", we still need to add "c" + // + // 2. Trailing delimiter case: In CSV parsing, trailing delimiters are significant + // and indicate an empty field at the end. + // Example: "a,b," represents ["a", "b", ""] + // + // The normal flow only adds non-empty words after trimming, which means + // if the input ends with a delimiter, the trailing empty field would be lost. + // We need to handle this case separately to preserve the empty trailing field. if (currentWord.trim()) { result.push(currentWord.trim()); + } else if (candidate.endsWith(',')) { + // If input ends with a delimiter, add an empty field + result.push(''); } return result; diff --git a/test/string.test.ts b/test/string.test.ts index 73c1dbf..43dcb1e 100644 --- a/test/string.test.ts +++ b/test/string.test.ts @@ -81,4 +81,10 @@ describe('#splitWords', () => { 'Mismatched quotes in input string' ); }); + + test('preserves trailing empty field when input ends with a delimiter', () => { + expect(splitWords('apple,banana,')).toEqual(['apple', 'banana', '']); + expect(splitWords('apple,')).toEqual(['apple', '']); + expect(splitWords(',')).toEqual(['', '']); + }); }); From c365bf54fa2c847290618613eb983353d38655bb Mon Sep 17 00:00:00 2001 From: Shivam Mishra Date: Tue, 29 Apr 2025 09:44:20 +0530 Subject: [PATCH 5/6] feat: add join word method --- src/index.ts | 3 ++- src/string.ts | 23 ++++++++++++++++ test/string.test.ts | 64 ++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 88 insertions(+), 2 deletions(-) diff --git a/src/index.ts b/src/index.ts index 4dfb0f8..d2fe690 100644 --- a/src/index.ts +++ b/src/index.ts @@ -13,7 +13,7 @@ import { import { getRecipients } from './email'; -import { parseBoolean, splitWords } from './string'; +import { parseBoolean, splitWords, joinWords } from './string'; import { sortAsc, quantile, @@ -49,6 +49,7 @@ export { applyRotationTransform, normalizeToPercentage, getUndefinedVariablesInMessage, + joinWords, parseBoolean, splitWords, quantile, diff --git a/src/string.ts b/src/string.ts index 294571d..e61f3bd 100644 --- a/src/string.ts +++ b/src/string.ts @@ -93,3 +93,26 @@ export function splitWords(candidate: string): string[] { return result; } + +/** + * Joins an array of words into a string, properly quoting phrases that contain commas + * to ensure they can be correctly split later using splitWords. + * + * Example: ['apple, banana', 'cherry'] => '"apple, banana",cherry' + * + * @param {string[]} words - Array of words/phrases to join + * @return {string} - The joined string + */ +export function joinWords(words: string[]): string { + if (!words || words.length === 0) return ''; + + return words + .map(word => { + // If the word contains a comma, wrap it in double quotes + if (word.includes(',')) { + return `"${word}"`; + } + return word; + }) + .join(','); +} diff --git a/test/string.test.ts b/test/string.test.ts index 43dcb1e..e5912b4 100644 --- a/test/string.test.ts +++ b/test/string.test.ts @@ -1,4 +1,4 @@ -import { parseBoolean, splitWords } from '../src'; +import { parseBoolean, splitWords, joinWords } from '../src'; describe('#parseBoolean', () => { test('returns true for input "true"', () => { @@ -88,3 +88,65 @@ describe('#splitWords', () => { expect(splitWords(',')).toEqual(['', '']); }); }); + +describe('#joinWords', () => { + test('joins array of words with commas', () => { + expect(joinWords(['apple', 'banana', 'cherry'])).toBe( + 'apple,banana,cherry' + ); + }); + + test('adds quotes around words containing commas', () => { + expect(joinWords(['apple, banana', 'cherry'])).toBe( + '"apple, banana",cherry' + ); + }); + + test('handles empty array', () => { + expect(joinWords([])).toBe(''); + }); + + test('handles undefined or null input', () => { + // @ts-ignore + expect(joinWords(undefined)).toBe(''); + // @ts-ignore + expect(joinWords(null)).toBe(''); + }); + + test('handles array with empty strings', () => { + expect(joinWords(['', ''])).toBe(','); + expect(joinWords(['apple', ''])).toBe('apple,'); + }); +}); + +describe('lossless conversion between splitWords and joinWords', () => { + test('splitting and joining simple string preserves original', () => { + const original = 'apple,banana,cherry'; + const split = splitWords(original); + const joined = joinWords(split); + expect(joined).toBe(original); + }); + + test('splitting and joining string with quoted phrases preserves semantics', () => { + const original = '"apple, banana",cherry'; + const split = splitWords(original); + const joined = joinWords(split); + expect(split).toEqual(['apple, banana', 'cherry']); + expect(splitWords(joined)).toEqual(split); + }); + + test('splitting and joining handles empty fields correctly', () => { + const original = 'apple,banana,'; + const split = splitWords(original); + const joined = joinWords(split); + expect(joined).toBe(original); + expect(splitWords(joined)).toEqual(split); + }); + + test('joining and splitting preserves original array', () => { + const original = ['apple', 'banana, with comma', 'cherry']; + const joined = joinWords(original); + const split = splitWords(joined); + expect(split).toEqual(original); + }); +}); From c6e60e5da526a78a66c8687c89cfa84c961c0379 Mon Sep 17 00:00:00 2001 From: Shivam Mishra Date: Thu, 15 May 2025 15:20:00 +0530 Subject: [PATCH 6/6] feat: return empty array --- src/string.ts | 2 +- test/string.test.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/string.ts b/src/string.ts index e61f3bd..b7c2194 100644 --- a/src/string.ts +++ b/src/string.ts @@ -32,7 +32,7 @@ export function parseBoolean(candidate: string | number) { */ export function splitWords(candidate: string): string[] { // Handle empty input case - if (!candidate) return ['']; + if (!candidate) return []; const result = []; let currentWord = ''; diff --git a/test/string.test.ts b/test/string.test.ts index e5912b4..00b5a90 100644 --- a/test/string.test.ts +++ b/test/string.test.ts @@ -48,7 +48,7 @@ describe('#splitWords', () => { }); test('returns an empty array for input ""', () => { - expect(splitWords('')).toEqual(['']); + expect(splitWords('')).toEqual([]); }); test('returns an array with a single word for input "apple"', () => {