diff --git a/picard/ui/itemviews/basetreeview.py b/picard/ui/itemviews/basetreeview.py index 8f0e1ff6b5..5da0a378a0 100644 --- a/picard/ui/itemviews/basetreeview.py +++ b/picard/ui/itemviews/basetreeview.py @@ -48,6 +48,7 @@ heappop, heappush, ) +from typing import TYPE_CHECKING, cast from PyQt6 import ( QtCore, @@ -84,10 +85,14 @@ from picard.util import ( icontheme, iter_files_from_objects, - normpath, + qurl_to_local_path, restore_method, ) + +if TYPE_CHECKING: # Avoid runtime circular import + from picard.tagger import Tagger + from picard.ui.collectionmenu import CollectionMenu from picard.ui.enums import MainAction from picard.ui.filter import Filter @@ -475,16 +480,18 @@ def scrollTo(self, index, scrolltype=QtWidgets.QAbstractItemView.ScrollHint.Ensu def drop_urls(urls, target, move_to_multi_tracks=True): files = [] new_paths = [] - tagger = QtCore.QCoreApplication.instance() + # We need to cast to Tagger here to avoid type checking errors below. + tagger = cast("Tagger", QtCore.QCoreApplication.instance()) for url in urls: log.debug("Dropped the URL: %r", url.toString(QtCore.QUrl.UrlFormattingOption.RemoveUserInfo)) - if url.scheme() == 'file' or not url.scheme(): - filename = normpath(url.toLocalFile().rstrip('\0')) + filename = qurl_to_local_path(url) + if filename: file = tagger.files.get(filename) if file: files.append(file) else: - new_paths.append(filename) + if filename: + new_paths.append(filename) elif url.scheme() in {'http', 'https'}: file_lookup = tagger.get_file_lookup() file_lookup.mbid_lookup(url.path(), browser_fallback=False) diff --git a/picard/ui/mainwindow/__init__.py b/picard/ui/mainwindow/__init__.py index 911a921c3c..a59531896a 100644 --- a/picard/ui/mainwindow/__init__.py +++ b/picard/ui/mainwindow/__init__.py @@ -89,6 +89,7 @@ from picard.track import Track from picard.util import ( IgnoreUpdatesContext, + canonicalize_path, icontheme, iter_files_from_objects, iter_unique, @@ -927,6 +928,8 @@ def add_files(self): filter=";;".join(formats), ) if files: + # Canonicalize paths before use + files = tuple(canonicalize_path(p) for p in files if p) config = get_config() config.persist['current_directory'] = os.path.dirname(files[0]) self.tagger.add_files(files) @@ -943,12 +946,14 @@ def add_directory(self): dir=current_directory, ) if directory: - dir_list.append(directory) + dir_list.append(canonicalize_path(directory)) else: dir_list = FileDialog.getMultipleDirectories( parent=self, directory=current_directory, ) + if dir_list: + dir_list = tuple(canonicalize_path(d) for d in dir_list if d) dir_count = len(dir_list) if dir_count: diff --git a/picard/util/__init__.py b/picard/util/__init__.py index 739ee1647b..93edc8d283 100644 --- a/picard/util/__init__.py +++ b/picard/util/__init__.py @@ -61,12 +61,13 @@ import ntpath from operator import attrgetter import os -from pathlib import PurePath +from pathlib import Path, PurePath import re import subprocess # nosec: B404 import sys from time import monotonic import unicodedata +from urllib.parse import unquote from dateutil.parser import parse @@ -254,6 +255,213 @@ def normpath(path, realpath=True): return path +def _resolve_path_components_macos(path: str) -> str: + """Best-effort macOS path component resolution. + + Resolve path components against the filesystem on macOS to account for + Unicode normalization differences (NFC vs. NFD). Returns a best‑effort + on-disk representation, resolving as many components as possible. + + Parameters + ---------- + path : str + The input path to resolve. Can be absolute or relative. The final + file/directory does not need to exist. + + Returns + ------- + str + A path with components matched to existing directory entries where + possible. If resolution fails for any component, the original + component is kept. Absolute vs. relative form is preserved. + + Notes + ----- + - This function does not change the drive/volume. + - It tolerates missing final components (resolves what exists). + - Matching is performed using exact, NFC, and NFD comparisons. + """ + + def _listdir_safe(directory: Path) -> tuple[str, ...]: + try: + return tuple(entry.name for entry in directory.iterdir()) + except OSError: + return () + + def _match_component(candidate: str, entries: tuple[str, ...]) -> str: + if not entries: + return candidate + if candidate in entries: + return candidate + cand_nfc = unicodedata.normalize('NFC', candidate) + cand_nfd = unicodedata.normalize('NFD', candidate) + for entry in entries: + if ( + entry == cand_nfc + or entry == cand_nfd + or unicodedata.normalize('NFC', entry) == cand_nfc + or unicodedata.normalize('NFD', entry) == cand_nfd + ): + return entry + return candidate + + def _join_resolved(parts: list[str], absolute: bool) -> str: + if not parts: + return os.sep if absolute else '' + if absolute: + return str(Path('/').joinpath(*parts)) + return str(Path().joinpath(*parts)) + + def resolved_parts(is_abs): + current_dir = Path('/') if is_abs else Path.cwd() + + for part in PurePath(path).parts: + if part in ('', '.'): + continue + entries = _listdir_safe(current_dir) + chosen = _match_component(part, entries) + yield chosen + current_dir = current_dir / chosen + + try: + if not path: + return path + is_abs = Path(path).is_absolute() + return _join_resolved(list(resolved_parts(is_abs)), is_abs) + except Exception as err: # noqa: BLE001 + log.debug("canonicalize components failed for %r: %s", path, err) + return str(Path(path)) + + +def canonicalize_path(path: str) -> str: + """Canonicalize a path for robust I/O. + + Produces a sanitized path suitable for file operations by trimming + trailing NULs, normalizing separators, and resolving platform-specific + quirks (e.g., Unicode normalization on macOS). + + Parameters + ---------- + path : str + The input path as a Unicode string. + + Returns + ------- + str + A canonicalized path. On macOS, components are matched against the + filesystem using normalization-insensitive rules. On all platforms, + the result is normalized and, where possible, resolved via + ``os.path.realpath``. + + Notes + ----- + - Trailing ``"\0"`` characters are stripped. + - If resolution via ``realpath`` fails, the normalized input is returned. + - Non-string inputs are returned unchanged. + """ + if not isinstance(path, str): + return path # type: ignore[return-value] + + # Trim any accidental NUL terminators and normalize separators + trimmed = path.rstrip('\0') + if not trimmed: + return '' + + # Avoid realpath here; resolve components manually on macOS first + normalized = str(Path(trimmed)) + + # Windows: Qt QUrl without scheme can yield paths like "/C:/foo". + # Strip the leading slash so we keep the correct drive when absolutizing. + if IS_WIN and re.match(r'^/[A-Za-z]:', normalized): + normalized = normalized[1:] + + if IS_MACOS: + resolved = _resolve_path_components_macos(normalized) + # realpath only if possible, otherwise keep best-effort resolved path + try: + candidate = str(Path(resolved).resolve(strict=False)) + except OSError: + candidate = resolved + else: + # On non-macOS platforms avoid resolving symlinks/junctions here, as this can + # unexpectedly change drive letters on Windows runners (e.g. C: → D:). + # Prefer an absolute, normalized path without crossing filesystem links. + candidate = os.path.abspath(os.path.normpath(normalized)) + + # Windows-specific path handling + if IS_WIN: + # Normalize drive letter to uppercase for stable comparisons + if len(candidate) >= 2 and candidate[1] == ':': + candidate = candidate[0].upper() + candidate[1:] + # Carry over Windows long-path handling from normpath + if not system_supports_long_paths(): + candidate = win_prefix_longpath(candidate) + + return candidate + + +def is_local_file_url(url): + """Check if a QUrl should be treated as a local file. + + Parameters + ---------- + url : QtCore.QUrl + The URL to check. + + Returns + ------- + bool + True if the URL should be treated as a local file, False otherwise. + + Notes + ----- + Accept local files via explicit file:// scheme or scheme-less URLs. + Qt on Windows can misparse "C:\\path" as scheme "c" with an empty path. + Detect a leading Windows drive (single letter + ":") in the original (sans user info) + string and treat it as a local path rather than a custom scheme. + """ + import re + + return ( + url.scheme() == 'file' + or not url.scheme() + or ( + IS_WIN + and len(url.scheme()) == 1 + and re.match(r'^[a-zA-Z]:', url.toString(QtCore.QUrl.UrlFormattingOption.RemoveUserInfo)) + ) + ) + + +def qurl_to_local_path(url: QtCore.QUrl) -> str | None: + """Convert a QUrl to a normalized local filesystem path. + + Parameters + ---------- + url : QtCore.QUrl + The URL to convert to a local path. + + Returns + ------- + str + A canonicalized local filesystem path, or empty string if conversion fails. + + Notes + ----- + Prefer a local file if provided; fall back to treating the URL path as a local filesystem path. + For some Windows inputs (e.g. raw "C:\\path"), QUrl.path() can drop the drive ("/Users/..."), + which would cause os.path.abspath to adopt the current drive. Prefer the original string + (without user info) before url.path() to preserve the drive letter. + Unquote percent-encoding (e.g. "\\\\" → "%5C") to recover a plain local path. + """ + if not is_local_file_url(url): + return '' + + raw_str = url.toString(QtCore.QUrl.UrlFormattingOption.RemoveUserInfo) + local: str = url.toLocalFile() or unquote(raw_str) or url.path() + return canonicalize_path(local) if local else '' + + def win_prefix_longpath(path): """ For paths longer then WIN_MAX_FILEPATH_LEN enable long path support by prefixing with WIN_LONGPATH_PREFIX. diff --git a/test/test_canonicalize_path.py b/test/test_canonicalize_path.py new file mode 100644 index 0000000000..ef37271964 --- /dev/null +++ b/test/test_canonicalize_path.py @@ -0,0 +1,151 @@ +# -*- coding: utf-8 -*- +# +# Picard, the next-generation MusicBrainz tagger +# +# Copyright (C) 2025 The MusicBrainz Team +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +""" +Tests for picard.util.canonicalize_path +""" + +from __future__ import annotations + +import os +from pathlib import Path +import unicodedata + +from picard.const.sys import IS_MACOS +from picard.util import ( + WIN_LONGPATH_PREFIX, + WIN_MAX_FILEPATH_LEN, + canonicalize_path, + win_prefix_longpath, +) + +import pytest + + +@pytest.mark.parametrize( + ("raw", "expected"), + [ + ("/tmp/foo\0\0", "/tmp/foo"), + ("./a/../b\0", str(Path("b"))), + ], +) +def test_canonicalize_path_trims_nulls_and_normalizes(raw: str, expected: str) -> None: + out: str = canonicalize_path(raw) + # Only compare tail to avoid host-specific absolute prefixes + assert out.endswith(os.path.normpath(expected)) + + +def test_canonicalize_path_returns_non_string_unchanged() -> None: + # Provide arbitrary bytes; function should pass through unchanged + path_b: bytes = b"/invalid/bytes/\xff\xfe.bin" + out = canonicalize_path(path_b) # type: ignore[arg-type] + assert isinstance(out, (bytes, bytearray)) + assert out == path_b + + +def test_canonicalize_path_resolves_existing_paths(tmp_path: os.PathLike[str]) -> None: + nested: Path = Path(tmp_path) / "x" / "y" + nested.mkdir(parents=True, exist_ok=True) + fpath: Path = nested / "file.txt" + fpath.write_bytes(b"ok") + + out: str = canonicalize_path(str(fpath)) + # Should resolve to an absolute path that exists + assert Path(out).is_absolute() + assert Path(out).exists() + + +def test_canonicalize_path_preserves_relative_form(tmp_path: os.PathLike[str], monkeypatch: pytest.MonkeyPatch) -> None: + # Ensure cwd is tmp_path so relative resolution is predictable + monkeypatch.chdir(tmp_path) + rel: str = "a/../b/c.txt\0" + # Create parent only; file may not exist, but normalization should still run + Path("b").mkdir(parents=True, exist_ok=True) + out: str = canonicalize_path(rel) + # Non-strict resolve keeps path normalized; we check it endswith normalized relative + assert out.endswith(str(Path("b") / "c.txt")) + + +def test_win_prefix_longpath_drive_path() -> None: + # Long path over the limit should get the standard long-path prefix + long_tail: str = "a" * (WIN_MAX_FILEPATH_LEN + 5) + p: str = "C:\\" + long_tail + out: str = win_prefix_longpath(p) + assert out.startswith(WIN_LONGPATH_PREFIX) + assert out[len(WIN_LONGPATH_PREFIX) :].startswith("C:\\") + + +def test_win_prefix_longpath_unc_path() -> None: + # UNC path should translate to \\?\UNC + path[1:] + long_tail: str = "share\\" + ("a" * (WIN_MAX_FILEPATH_LEN + 5)) + p: str = "\\\\server\\" + long_tail + out: str = win_prefix_longpath(p) + assert out.startswith(WIN_LONGPATH_PREFIX + 'UNC') + assert out.endswith(long_tail) + + +def test_canonicalize_path_applies_windows_longpath(monkeypatch: pytest.MonkeyPatch) -> None: + # Force Windows branch and ensure prefixing is applied via the helper + monkeypatch.setattr("picard.util.IS_WIN", True, raising=False) + monkeypatch.setattr("picard.util.system_supports_long_paths", lambda: False, raising=False) + + calls: list[str] = [] + + def _spy_prefix(path: str) -> str: + calls.append(path) + return "PREFIXED:" + path + + monkeypatch.setattr("picard.util.win_prefix_longpath", _spy_prefix, raising=False) + + # Any input path is fine; the spy will be invoked unconditionally under IS_WIN and no long paths + raw: str = "C:/dummy" + out: str = canonicalize_path(raw) + assert calls, "win_prefix_longpath was not called" + assert out.startswith("PREFIXED:") + + +@pytest.mark.skipif(not IS_MACOS, reason="macOS-only normalization behavior") +@pytest.mark.parametrize( + ("name"), + [ + "Caf\u00e9", # café + "Mon C\u0153ur", # cœur with ligature + "S’ouvre \u00c0 Ta Voix", # curly apostrophe and À + ], +) +def test_canonicalize_path_componentwise_nfc_nfd(tmp_path: os.PathLike[str], name: str) -> None: + base = Path(tmp_path) + name_nfc = unicodedata.normalize("NFC", name) + name_nfd = unicodedata.normalize("NFD", name) + + # Create using one form, then address using the other + created_dir = base / name_nfd + created_dir.mkdir() + # Determine actual on-disk entry spelling (APFS/HFS+ may adjust) + actual_entry = next(p.name for p in base.iterdir()) + + # Address path using the alternate form + alt_path = str(base / name_nfc / "test.txt") + out: str = canonicalize_path(alt_path) + + # Parent should resolve to the actual entry; file may not exist yet + parent = Path(out).parent + assert parent.exists() + assert parent.name == actual_entry diff --git a/test/test_path_handling.py b/test/test_path_handling.py new file mode 100644 index 0000000000..afc6a8741e --- /dev/null +++ b/test/test_path_handling.py @@ -0,0 +1,328 @@ +# -*- coding: utf-8 -*- +# +# Picard, the next-generation MusicBrainz tagger +# +# Copyright (C) 2025 The MusicBrainz Team +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +""" +Pytest-based tests for filename/path handling across locales and drag-and-drop. + +Covers regressions referenced by: + - PICARD-167: Handle non-UTF-8 locales better + - PICARD-233: Charset of file different than application + +Some scenarios are marked xfail to document and reproduce current bugs. +""" + +from __future__ import annotations + +from dataclasses import dataclass +import os +import os.path as osp +from typing import Any, Iterable +import unicodedata + +from PyQt6 import QtCore + +from picard.const.sys import IS_MACOS +from picard.tagger import Tagger +from picard.util import decode_filename, encode_filename + +import pytest + +from picard.ui.itemviews.basetreeview import BaseTreeView + + +@dataclass +class _Captured: + paths: list[str] | None = None + add_paths_target: Any | None = None + moved_files: list[Any] | None = None + move_to_multi_tracks: bool | None = None + move_files_target: Any | None = None + mbid_lookups: list[tuple[str, bool]] | None = None + + +@dataclass +class _FakeLookup: + captured: _Captured + + def mbid_lookup(self, url_path: str, browser_fallback: bool = False) -> None: + if self.captured.mbid_lookups is None: + self.captured.mbid_lookups = [] + self.captured.mbid_lookups.append((url_path, browser_fallback)) + + +@dataclass +class _FakeTagger: + files: dict[str, Any] + captured: _Captured + + def add_paths(self, paths: Iterable[str], target: Any | None = None) -> None: + if isinstance(paths, list): + self.captured.paths = paths + else: + self.captured.paths = list(paths) + self.captured.add_paths_target = target + + def get_file_lookup(self) -> _FakeLookup: + return _FakeLookup(self.captured) + + def move_files(self, files: list[Any], target: Any | None, move_to_multi_tracks: bool) -> None: + self.captured.moved_files = list(files) + self.captured.move_files_target = target + self.captured.move_to_multi_tracks = move_to_multi_tracks + + +@pytest.fixture +def fake_tagger(monkeypatch: pytest.MonkeyPatch) -> tuple[_FakeTagger, _Captured]: + """Provide a minimal fake tagger returned by QCoreApplication.instance().""" + captured = _Captured() + fake = _FakeTagger(files={}, captured=captured) + monkeypatch.setattr(QtCore.QCoreApplication, "instance", lambda: fake) + return fake, captured + + +@pytest.mark.parametrize( + "name", + [ + "Café.txt", + "Mon Cœur.txt", + "S’ouvre.txt", + "I Belong To You _ Mon Cœur S’ouvre À Ta Voix.txt", + "日本語 🐍.txt", + "emoji_🚀_é_œ_’.txt", + ], +) +@pytest.mark.parametrize("scheme_kind", ["file", "noscheme"], ids=["file", "noscheme"]) +@pytest.mark.skipif( + not osp.supports_unicode_filenames and not IS_MACOS, reason="Unicode filenames not supported on this FS" +) +def test_drop_urls_preserves_unicode_paths( + tmp_path: os.PathLike[str], fake_tagger: tuple[_FakeTagger, _Captured], name: str, scheme_kind: str +) -> None: + fake, captured = fake_tagger + path: str = osp.normpath(osp.join(str(tmp_path), name)) + with open(path, "wb") as f: + f.write(b"test") + + if scheme_kind == "file": + url: QtCore.QUrl = QtCore.QUrl.fromLocalFile(path) + else: + # Construct URL without explicit scheme; BaseTreeView accepts both + url = QtCore.QUrl(path) + + BaseTreeView.drop_urls([url], target=None, move_to_multi_tracks=True) + + assert captured.paths is not None + assert len(captured.paths) == 1 + assert isinstance(captured.paths[0], str) + assert captured.paths[0] == osp.normpath(path) + + +@pytest.mark.parametrize( + "http_url", + [ + "https://musicbrainz.org/recording/1234", + "http://example.com/not-mbid", + ], +) +def test_drop_urls_http_invokes_mbid_lookup(fake_tagger: tuple[_FakeTagger, _Captured], http_url: str) -> None: + _fake, captured = fake_tagger + url: QtCore.QUrl = QtCore.QUrl(http_url) + + BaseTreeView.drop_urls([url], target=None, move_to_multi_tracks=True) + + # For HTTP/HTTPS we must not call add_paths + assert captured.paths is None + # We should have attempted an MBID lookup with the URL path + assert captured.mbid_lookups is not None + assert (url.path(), False) in captured.mbid_lookups + + +def test_drop_urls_moves_existing_files_and_adds_new_paths( + tmp_path: os.PathLike[str], fake_tagger: tuple[_FakeTagger, _Captured] +) -> None: + fake, captured = fake_tagger + # Prepare two paths: one already tracked (should be moved), one new (should be added) + existing_path = osp.join(str(tmp_path), "existing.flac") + new_path = osp.join(str(tmp_path), "new.ogg") + for p in (existing_path, new_path): + with open(p, "wb") as f: + f.write(b"x") + + existing_file_obj = object() + fake.files[osp.normpath(existing_path)] = existing_file_obj + + urls = [ + QtCore.QUrl.fromLocalFile(existing_path), + QtCore.QUrl(new_path), # scheme-less + ] + + BaseTreeView.drop_urls(urls, target="TGT", move_to_multi_tracks=False) + + # move_files called with the existing file object + assert captured.moved_files == [existing_file_obj] + assert captured.move_files_target == "TGT" + assert captured.move_to_multi_tracks is False + + # add_paths called with the new normalized path + assert captured.paths == [osp.normpath(new_path)] + assert captured.add_paths_target == "TGT" + + +def test_drop_urls_trims_trailing_null_and_normalizes( + tmp_path: os.PathLike[str], fake_tagger: tuple[_FakeTagger, _Captured] +) -> None: + # Build a stub mimicking QUrl that returns a path with a trailing NUL + norm_base = osp.normpath(str(tmp_path)) + raw_path = osp.join(norm_base, "dir", "..", "file.txt") + os.makedirs(osp.dirname(osp.normpath(raw_path)), exist_ok=True) + with open(osp.normpath(raw_path), "wb") as f: + f.write(b"ok") + + class _StubUrl: + def __init__(self, p: str) -> None: + self._p = p + + def scheme(self) -> str: + return "file" + + def toLocalFile(self) -> str: + return self._p + "\0" + + def path(self) -> str: + return self._p + + def toString(self, *_args: Any, **_kwargs: Any) -> str: + return self._p + + url = _StubUrl(raw_path) + _fake, captured = fake_tagger + BaseTreeView.drop_urls([url], target=None, move_to_multi_tracks=True) + + # Expect trimmed and normalized path + assert captured.paths is not None + assert len(captured.paths) == 1 + assert captured.paths[0] == osp.normpath(raw_path) + + +def test_drop_urls_ignores_unknown_schemes(fake_tagger: tuple[_FakeTagger, _Captured]) -> None: + _fake, captured = fake_tagger + url = QtCore.QUrl("ftp://example.com/foo.mp3") + BaseTreeView.drop_urls([url], target=None, move_to_multi_tracks=True) + assert captured.paths is None + assert captured.moved_files is None + assert captured.mbid_lookups is None + + +@pytest.mark.parametrize( + ("bad_encoding", "filename"), + [ + ("ISO-8859-1", "Café _ Mon Cœur.txt"), + ("ASCII", "S’ouvre.txt"), + ("euc_jp", "波形 ~ テスト.txt"), + ], +) +@pytest.mark.xfail(reason="encode_filename currently performs lossy replacement under non-UTF-8 locales") +@pytest.mark.skipif(IS_MACOS, reason="macOS path handling will be addressed separately") +def test_encode_filename_should_not_lossily_replace_on_posix( + monkeypatch: pytest.MonkeyPatch, bad_encoding: str, filename: str +) -> None: + """Simulate non-UTF-8 locale and assert we do not corrupt Unicode paths.""" + # Use a POSIX-like absolute path for consistency + unicode_path: str = f"/tmp/{filename}" + + # Force Picard's view of the filesystem encoding + monkeypatch.setattr("picard.util._io_encoding", bad_encoding, raising=False) + + result = encode_filename(unicode_path) + # Desired: keep str unchanged; current behavior often returns bytes or lossy content + assert isinstance(result, str) + assert result == unicode_path + + +@pytest.mark.skipif(not IS_MACOS, reason="macOS-specific normalization behavior") +def test_component_wise_resolution_between_nfc_and_nfd(tmp_path: os.PathLike[str]) -> None: + """Create path using NFD but address it using NFC, expecting resolution. + + This documents the normalization mismatch seen with FUSE/sshfs and Finder. + """ + base: str = str(tmp_path) + name_nfc: str = "Caf\u00e9" + name_nfd: str = unicodedata.normalize("NFD", name_nfc) + assert name_nfc != name_nfd + + nfd_dir: str = osp.join(base, name_nfd) + os.makedirs(nfd_dir, exist_ok=True) + + file_name_nfc: str = "T\u00c0.txt" # "TÀ.txt" + file_path_nfd: str = osp.join(nfd_dir, file_name_nfc) + with open(file_path_nfd, "wb") as f: + f.write(b"ok") + + # Address using NFC form + nfc_dir: str = osp.join(base, name_nfc) + nfc_path: str = osp.join(nfc_dir, file_name_nfc) + + # Future resolver should make this True even if the underlying FS exposes different normalization + assert os.path.exists(nfc_path) + + +# ------------------------ +# POSIX bytes-path testing +# ------------------------ + + +@pytest.mark.skipif(os.name != "posix" or IS_MACOS, reason="bytes-path tests are POSIX-only and skipped on macOS") +def test_bytes_filename_ascii_locale(monkeypatch: pytest.MonkeyPatch, tmp_path: os.PathLike[str]) -> None: + """Create a file using raw bytes in the name and verify behavior under ASCII locale.""" + base_b: bytes = os.fsencode(str(tmp_path)) + # Name includes bytes not representable in ASCII/UTF-8 (0xE9, 0xFF) + name_b: bytes = b"Cafe\xe9_\xff.bin" + path_b: bytes = base_b + b"/" + name_b + + fd: int = os.open(path_b, os.O_CREAT | os.O_WRONLY, 0o644) + os.close(fd) + assert osp.exists(path_b) + + # Simulate ASCII locale in Picard + monkeypatch.setattr("picard.util._io_encoding", "ASCII", raising=False) + + # decode_filename should fail on undecodable bytes + with pytest.raises(UnicodeDecodeError): + decode_filename(name_b) + + # encode_filename should return bytes unchanged if bytes were provided + assert encode_filename(path_b) == path_b + + # os APIs still function with bytes paths + st = os.stat(path_b) + assert st.st_size == 0 + + +@pytest.mark.skipif(os.name != "posix" or IS_MACOS, reason="POSIX-only and skipped on macOS") +def test_scan_recursive_accepts_bytes(tmp_path: os.PathLike[str]) -> None: + """Ensure Tagger._scan_paths_recursive yields bytes paths if provided bytes input.""" + base_b: bytes = os.fsencode(str(tmp_path)) + name_b: bytes = b"\xff\xfe.bad" + path_b: bytes = base_b + b"/" + name_b + fd: int = os.open(path_b, os.O_CREAT | os.O_WRONLY, 0o644) + os.close(fd) + + out = list(Tagger._scan_paths_recursive([path_b], recursive=False, ignore_hidden=False)) + assert path_b in out diff --git a/test/test_resolve_path_components_macos.py b/test/test_resolve_path_components_macos.py new file mode 100644 index 0000000000..fe04d4b881 --- /dev/null +++ b/test/test_resolve_path_components_macos.py @@ -0,0 +1,93 @@ +# -*- coding: utf-8 -*- +# +# Picard, the next-generation MusicBrainz tagger +# +# Copyright (C) 2025 The MusicBrainz Team +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +""" +Tests for picard.util._resolve_path_components_macos +""" + +from __future__ import annotations + +import os +from pathlib import Path +import unicodedata + +from picard.const.sys import IS_MACOS +from picard.util import _resolve_path_components_macos + +import pytest + + +pytestmark = pytest.mark.skipif(not IS_MACOS, reason="macOS-only tests") + + +@pytest.mark.parametrize( + ("parts"), + [ + ("A",), + ("A", "B"), + ("Caf\u00e9",), + ("Mon C\u0153ur",), + ], +) +def test_resolve_existing_components(tmp_path: os.PathLike[str], parts: tuple[str, ...]) -> None: + base = Path(tmp_path) + current = base + for p in parts: + current = current / p + current.mkdir(parents=True, exist_ok=True) + + out: str = _resolve_path_components_macos(str(current)) + assert Path(out).exists() + assert Path(out).is_dir() + + +@pytest.mark.parametrize( + ("name"), + [ + "Caf\u00e9", + "Mon C\u0153ur", + "S’ouvre \u00c0 Ta Voix", + ], +) +def test_resolve_nfc_nfd_alternates(tmp_path: os.PathLike[str], name: str) -> None: + base = Path(tmp_path) + name_nfc = unicodedata.normalize("NFC", name) + name_nfd = unicodedata.normalize("NFD", name) + + created_dir = base / name_nfd + created_dir.mkdir(parents=True, exist_ok=True) + actual_entry = next(p.name for p in base.iterdir()) + + # Pass the alternate spelling + alt_dir = base / name_nfc / "sub" + out: str = _resolve_path_components_macos(str(alt_dir)) + # Parent should resolve to actual entry, even if final component doesn't exist + parent = Path(out).parent + assert parent.exists() + assert parent.name == actual_entry + + +def test_resolve_preserves_relative_form(tmp_path: os.PathLike[str], monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.chdir(tmp_path) + Path("a").mkdir(exist_ok=True) + rel = Path("a") / ".." / "b" / "c" + # Only 'a' exists; 'b' should be left as-is but normalized + out: str = _resolve_path_components_macos(str(rel)) + assert out.endswith(str(Path("b") / "c"))