Skip to content

Commit 4478043

Browse files
authored
Merge pull request #2548 from zytact/picard-3000
PICARD-3000: Children's Music is shown as "Children'S Music" in Picard
2 parents 5dab69b + fe9302a commit 4478043

File tree

4 files changed

+104
-30
lines changed

4 files changed

+104
-30
lines changed

picard/script/functions.py

Lines changed: 2 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@
4141
from functools import reduce
4242
import operator
4343
import re
44-
import unicodedata
4544

4645
from picard.const.countries import RELEASE_COUNTRIES
4746
from picard.extension_points.script_functions import script_function
@@ -57,6 +56,7 @@
5756
)
5857
from picard.util import (
5958
pattern_as_regex,
59+
titlecase,
6060
uniqify,
6161
)
6262

@@ -962,33 +962,7 @@ def func_ne_any(parser, x, *args):
962962
_Since Picard 2.1_"""
963963
))
964964
def func_title(parser, text):
965-
# GPL 2.0 licensed code by Javier Kohen, Sambhav Kothari
966-
# from https://github.com/metabrainz/picard-plugins/blob/2.0/plugins/titlecase/titlecase.py
967-
if not text:
968-
return text
969-
capitalized = text[0].capitalize()
970-
capital = False
971-
for i in range(1, len(text)):
972-
t = text[i]
973-
if t in "’'" and text[i-1].isalpha():
974-
capital = False
975-
elif iswbound(t):
976-
capital = True
977-
elif capital and t.isalpha():
978-
capital = False
979-
t = t.capitalize()
980-
else:
981-
capital = False
982-
capitalized += t
983-
return capitalized
984-
985-
986-
def iswbound(char):
987-
# GPL 2.0 licensed code by Javier Kohen, Sambhav Kothari
988-
# from https://github.com/metabrainz/picard-plugins/blob/2.0/plugins/titlecase/titlecase.py
989-
""" Checks whether the given character is a word boundary """
990-
category = unicodedata.category(char)
991-
return 'Zs' == category or 'Sk' == category or 'P' == category[0]
965+
return titlecase(text)
992966

993967

994968
@script_function(documentation=N_(

picard/track.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,10 @@
7575
ScriptParser,
7676
iter_active_tagging_scripts,
7777
)
78-
from picard.util import pattern_as_regex
78+
from picard.util import (
79+
pattern_as_regex,
80+
titlecase,
81+
)
7982
from picard.util.imagelist import ImageList
8083
from picard.util.textencoding import asciipunct
8184

@@ -335,7 +338,7 @@ def _genres_to_metadata(genres, limit=None, minusage=0, filters='', join_with=No
335338

336339
# Find most common genres
337340
most_common_genres = genres.most_common(limit)
338-
genres_list = [name.title() for name, _count in most_common_genres]
341+
genres_list = [titlecase(name) for name, _count in most_common_genres]
339342
genres_list.sort()
340343

341344
# And generate the genre metadata tag

picard/util/__init__.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1183,3 +1183,51 @@ def detect_file_encoding(path, max_bytes_to_read=1024*256):
11831183
encoding = result['encoding'].lower()
11841184

11851185
return encoding
1186+
1187+
1188+
def iswbound(char):
1189+
# GPL 2.0 licensed code by Javier Kohen, Sambhav Kothari
1190+
# from https://github.com/metabrainz/picard-plugins/blob/2.0/plugins/titlecase/titlecase.py
1191+
""" Checks whether the given character is a word boundary """
1192+
category = unicodedata.category(char)
1193+
return 'Zs' == category or 'Sk' == category or 'P' == category[0]
1194+
1195+
1196+
def titlecase(text):
1197+
# GPL 2.0 licensed code by Javier Kohen, Sambhav Kothari
1198+
# from https://github.com/metabrainz/picard-plugins/blob/2.0/plugins/titlecase/titlecase.py
1199+
"""Converts text to title case following word boundary rules.
1200+
1201+
Capitalizes the first character of each word in the input text, where words
1202+
are determined by Unicode word boundaries. Preserves existing capitalization
1203+
after the first character of each word.
1204+
1205+
Args:
1206+
text (str): The input text to convert to title case.
1207+
1208+
Returns:
1209+
str: The text converted to title case. Returns empty string if input is empty.
1210+
1211+
Examples:
1212+
>>> titlecase("hello world")
1213+
'Hello World'
1214+
>>> titlecase("children's music")
1215+
'Children's Music'
1216+
"""
1217+
if not text:
1218+
return text
1219+
capitalized = text[0].capitalize()
1220+
capital = False
1221+
for i in range(1, len(text)):
1222+
t = text[i]
1223+
if t in "’'" and text[i-1].isalpha():
1224+
capital = False
1225+
elif iswbound(t):
1226+
capital = True
1227+
elif capital and t.isalpha():
1228+
capital = False
1229+
t = t.capitalize()
1230+
else:
1231+
capital = False
1232+
capitalized += t
1233+
return capitalized

test/test_utils.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474
pattern_as_regex,
7575
sort_by_similarity,
7676
system_supports_long_paths,
77+
titlecase,
7778
tracknum_and_title_from_filename,
7879
tracknum_from_filename,
7980
uniqify,
@@ -1019,3 +1020,51 @@ def test_detect_file_encoding_eac_windows_1251(self):
10191020
expected_encoding = 'windows-1251'
10201021
file_path = get_test_data_path('eac-windows1251.log')
10211022
self.assertEqual(expected_encoding, detect_file_encoding(file_path))
1023+
1024+
1025+
class TitlecaseTest(PicardTestCase):
1026+
1027+
def test_titlecase(self):
1028+
tests = (
1029+
# empty string
1030+
('', ''),
1031+
# simple cases
1032+
('hello world', 'Hello World'),
1033+
('Hello World', 'Hello World'),
1034+
('HELLO WORLD', 'HELLO WORLD'),
1035+
# contractions and possessives
1036+
("children's music", "Children's Music"),
1037+
("CHILDREN'S MUSIC", "CHILDREN'S MUSIC"),
1038+
("don't stop", "Don't Stop"),
1039+
# hyphenated words
1040+
('first-class ticket', 'First-Class Ticket'),
1041+
('FIRST-CLASS ticket', 'FIRST-CLASS Ticket'),
1042+
# multiple spaces
1043+
('hello world', 'Hello World'),
1044+
# punctuation
1045+
('hello, world!', 'Hello, World!'),
1046+
('hello... world', 'Hello... World'),
1047+
# special characters
1048+
('über café', 'Über Café'),
1049+
('españa', 'España'),
1050+
('ñandu', 'Ñandu'),
1051+
# single character words
1052+
('a b c', 'A B C'),
1053+
# numbers
1054+
('2001 a space odyssey', '2001 A Space Odyssey'),
1055+
# preserves existing capitalization after first letter
1056+
('MacDonald had a farm', 'MacDonald Had A Farm'),
1057+
('LaTeX document', 'LaTeX Document'),
1058+
# mixed case
1059+
('mIxEd CaSe', 'MIxEd CaSe'),
1060+
# unicode boundaries
1061+
('hello—world', 'Hello—World'),
1062+
('hello\u2014world', 'Hello\u2014World'),
1063+
# preserves all caps
1064+
('IBM PC', 'IBM PC'),
1065+
# single letter
1066+
('a', 'A'),
1067+
('A', 'A'),
1068+
)
1069+
for input, expected in tests:
1070+
self.assertEqual(expected, titlecase(input))

0 commit comments

Comments
 (0)