Merge pull request #2548 from zytact/picard-3000

zas · web-flow · commit 4478043db1f8 · 2024-11-05T20:49:06.000+01:00
PICARD-3000: Children's Music is shown as "Children'S Music" in Picard
diff --git a/picard/script/functions.py b/picard/script/functions.py
@@ -41,7 +41,6 @@
 from functools import reduce
 import operator
 import re
-import unicodedata
 
 from picard.const.countries import RELEASE_COUNTRIES
 from picard.extension_points.script_functions import script_function
@@ -57,6 +56,7 @@
 )
 from picard.util import (
     pattern_as_regex,
+    titlecase,
     uniqify,
 )
 
@@ -962,33 +962,7 @@ def func_ne_any(parser, x, *args):
 _Since Picard 2.1_"""
 ))
 def func_title(parser, text):
-    # GPL 2.0 licensed code by Javier Kohen, Sambhav Kothari
-    # from https://github.com/metabrainz/picard-plugins/blob/2.0/plugins/titlecase/titlecase.py
-    if not text:
-        return text
-    capitalized = text[0].capitalize()
-    capital = False
-    for i in range(1, len(text)):
-        t = text[i]
-        if t in "’'" and text[i-1].isalpha():
-            capital = False
-        elif iswbound(t):
-            capital = True
-        elif capital and t.isalpha():
-            capital = False
-            t = t.capitalize()
-        else:
-            capital = False
-        capitalized += t
-    return capitalized
-
-
-def iswbound(char):
-    # GPL 2.0 licensed code by Javier Kohen, Sambhav Kothari
-    # from https://github.com/metabrainz/picard-plugins/blob/2.0/plugins/titlecase/titlecase.py
-    """ Checks whether the given character is a word boundary """
-    category = unicodedata.category(char)
-    return 'Zs' == category or 'Sk' == category or 'P' == category[0]
+    return titlecase(text)
 
 
 @script_function(documentation=N_(
diff --git a/picard/track.py b/picard/track.py
@@ -75,7 +75,10 @@
     ScriptParser,
     iter_active_tagging_scripts,
 )
-from picard.util import pattern_as_regex
+from picard.util import (
+    pattern_as_regex,
+    titlecase,
+)
 from picard.util.imagelist import ImageList
 from picard.util.textencoding import asciipunct
 
@@ -335,7 +338,7 @@ def _genres_to_metadata(genres, limit=None, minusage=0, filters='', join_with=No
 
         # Find most common genres
         most_common_genres = genres.most_common(limit)
-        genres_list = [name.title() for name, _count in most_common_genres]
+        genres_list = [titlecase(name) for name, _count in most_common_genres]
         genres_list.sort()
 
         # And generate the genre metadata tag
diff --git a/picard/util/__init__.py b/picard/util/__init__.py
@@ -1183,3 +1183,51 @@ def detect_file_encoding(path, max_bytes_to_read=1024*256):
             encoding = result['encoding'].lower()
 
         return encoding
+
+
+def iswbound(char):
+    # GPL 2.0 licensed code by Javier Kohen, Sambhav Kothari
+    # from https://github.com/metabrainz/picard-plugins/blob/2.0/plugins/titlecase/titlecase.py
+    """ Checks whether the given character is a word boundary """
+    category = unicodedata.category(char)
+    return 'Zs' == category or 'Sk' == category or 'P' == category[0]
+
+
+def titlecase(text):
+    # GPL 2.0 licensed code by Javier Kohen, Sambhav Kothari
+    # from https://github.com/metabrainz/picard-plugins/blob/2.0/plugins/titlecase/titlecase.py
+    """Converts text to title case following word boundary rules.
+
+    Capitalizes the first character of each word in the input text, where words
+    are determined by Unicode word boundaries. Preserves existing capitalization
+    after the first character of each word.
+
+    Args:
+        text (str): The input text to convert to title case.
+
+    Returns:
+        str: The text converted to title case. Returns empty string if input is empty.
+
+    Examples:
+        >>> titlecase("hello world")
+        'Hello World'
+        >>> titlecase("children's music")
+        'Children's Music'
+    """
+    if not text:
+        return text
+    capitalized = text[0].capitalize()
+    capital = False
+    for i in range(1, len(text)):
+        t = text[i]
+        if t in "’'" and text[i-1].isalpha():
+            capital = False
+        elif iswbound(t):
+            capital = True
+        elif capital and t.isalpha():
+            capital = False
+            t = t.capitalize()
+        else:
+            capital = False
+        capitalized += t
+    return capitalized
diff --git a/test/test_utils.py b/test/test_utils.py
@@ -74,6 +74,7 @@
     pattern_as_regex,
     sort_by_similarity,
     system_supports_long_paths,
+    titlecase,
     tracknum_and_title_from_filename,
     tracknum_from_filename,
     uniqify,
@@ -1019,3 +1020,51 @@ def test_detect_file_encoding_eac_windows_1251(self):
         expected_encoding = 'windows-1251'
         file_path = get_test_data_path('eac-windows1251.log')
         self.assertEqual(expected_encoding, detect_file_encoding(file_path))
+
+
+class TitlecaseTest(PicardTestCase):
+
+    def test_titlecase(self):
+        tests = (
+            # empty string
+            ('', ''),
+            # simple cases
+            ('hello world', 'Hello World'),
+            ('Hello World', 'Hello World'),
+            ('HELLO WORLD', 'HELLO WORLD'),
+            # contractions and possessives
+            ("children's music", "Children's Music"),
+            ("CHILDREN'S MUSIC", "CHILDREN'S MUSIC"),
+            ("don't stop", "Don't Stop"),
+            # hyphenated words
+            ('first-class ticket', 'First-Class Ticket'),
+            ('FIRST-CLASS ticket', 'FIRST-CLASS Ticket'),
+            # multiple spaces
+            ('hello   world', 'Hello   World'),
+            # punctuation
+            ('hello, world!', 'Hello, World!'),
+            ('hello... world', 'Hello... World'),
+            # special characters
+            ('über café', 'Über Café'),
+            ('españa', 'España'),
+            ('ñandu', 'Ñandu'),
+            # single character words
+            ('a b c', 'A B C'),
+            # numbers
+            ('2001 a space odyssey', '2001 A Space Odyssey'),
+            # preserves existing capitalization after first letter
+            ('MacDonald had a farm', 'MacDonald Had A Farm'),
+            ('LaTeX document', 'LaTeX Document'),
+            # mixed case
+            ('mIxEd CaSe', 'MIxEd CaSe'),
+            # unicode boundaries
+            ('hello—world', 'Hello—World'),
+            ('hello\u2014world', 'Hello\u2014World'),
+            # preserves all caps
+            ('IBM PC', 'IBM PC'),
+            # single letter
+            ('a', 'A'),
+            ('A', 'A'),
+        )
+        for input, expected in tests:
+            self.assertEqual(expected, titlecase(input))