diff --git a/src/poetry/repositories/http_repository.py b/src/poetry/repositories/http_repository.py index 0af90e4230b..56437a590a6 100644 --- a/src/poetry/repositories/http_repository.py +++ b/src/poetry/repositories/http_repository.py @@ -26,6 +26,7 @@ from poetry.repositories.exceptions import PackageNotFoundError from poetry.repositories.exceptions import RepositoryError from poetry.repositories.link_sources.html import HTMLPage +from poetry.repositories.link_sources.json import SimpleJsonPage from poetry.utils.authenticator import Authenticator from poetry.utils.constants import REQUESTS_TIMEOUT from poetry.utils.helpers import HTTPRangeRequestSupportedError @@ -417,11 +418,13 @@ def calculate_sha256(self, link: Link) -> str | None: return f"{required_hash.name}:{required_hash.hexdigest()}" return None - def _get_response(self, endpoint: str) -> requests.Response | None: + def _get_response( + self, endpoint: str, *, headers: dict[str, str] | None = None + ) -> requests.Response | None: url = self._url + endpoint try: response: requests.Response = self.session.get( - url, raise_for_status=False, timeout=REQUESTS_TIMEOUT + url, raise_for_status=False, timeout=REQUESTS_TIMEOUT, headers=headers ) if response.status_code in (401, 403): self._log( @@ -442,8 +445,25 @@ def _get_response(self, endpoint: str) -> requests.Response | None: ) return response + def _get_prefer_json_header(self) -> dict[str, str]: + # Prefer json, but accept anything for backwards compatibility. + # Although the more specific value should be preferred to the less specific one + # according to https://developer.mozilla.org/en-US/docs/Glossary/Quality_values, + # we add a quality value because some servers still prefer html without one. + return {"Accept": "application/vnd.pypi.simple.v1+json, */*;q=0.1"} + + def _is_json_response(self, response: requests.Response) -> bool: + return ( + response.headers.get("Content-Type", "").split(";")[0].strip() + == "application/vnd.pypi.simple.v1+json" + ) + def _get_page(self, name: NormalizedName) -> LinkSource: - response = self._get_response(f"/{name}/") + response = self._get_response( + f"/{name}/", headers=self._get_prefer_json_header() + ) if not response: raise PackageNotFoundError(f"Package [{name}] not found.") + if self._is_json_response(response): + return SimpleJsonPage(response.url, response.json()) return HTMLPage(response.url, response.text) diff --git a/src/poetry/repositories/legacy_repository.py b/src/poetry/repositories/legacy_repository.py index f3f83bbfc01..04f82d0aa04 100644 --- a/src/poetry/repositories/legacy_repository.py +++ b/src/poetry/repositories/legacy_repository.py @@ -12,8 +12,9 @@ from poetry.inspection.info import PackageInfo from poetry.repositories.exceptions import PackageNotFoundError from poetry.repositories.http_repository import HTTPRepository -from poetry.repositories.link_sources.html import HTMLPage -from poetry.repositories.link_sources.html import SimpleRepositoryRootPage +from poetry.repositories.link_sources.base import SimpleRepositoryRootPage +from poetry.repositories.link_sources.html import SimpleRepositoryHTMLRootPage +from poetry.repositories.link_sources.json import SimpleRepositoryJsonRootPage if TYPE_CHECKING: @@ -130,21 +131,21 @@ def _get_release_info( ), ) - def _get_page(self, name: NormalizedName) -> HTMLPage: - if not (response := self._get_response(f"/{name}/")): - raise PackageNotFoundError(f"Package [{name}] not found.") - return HTMLPage(response.url, response.text) - @cached_property def root_page(self) -> SimpleRepositoryRootPage: - if not (response := self._get_response("/")): + if not ( + response := self._get_response("/", headers=self._get_prefer_json_header()) + ): self._log( f"Unable to retrieve package listing from package source {self.name}", level="error", ) return SimpleRepositoryRootPage() - return SimpleRepositoryRootPage(response.text) + if self._is_json_response(response): + return SimpleRepositoryJsonRootPage(response.json()) + + return SimpleRepositoryHTMLRootPage(response.text) def search(self, query: str | list[str]) -> list[Package]: results: list[Package] = [] diff --git a/src/poetry/repositories/link_sources/base.py b/src/poetry/repositories/link_sources/base.py index 10ec64f056c..65c1c2c7dd7 100644 --- a/src/poetry/repositories/link_sources/base.py +++ b/src/poetry/repositories/link_sources/base.py @@ -124,3 +124,24 @@ def yanked(self, name: NormalizedName, version: Version) -> str | bool: @cached_property def _link_cache(self) -> LinkCache: raise NotImplementedError() + + +class SimpleRepositoryRootPage: + """ + This class represents the parsed content of a "simple" repository's root page. + """ + + def search(self, query: str | list[str]) -> list[str]: + results: list[str] = [] + tokens = query if isinstance(query, list) else [query] + + for name in self.package_names: + if any(token in name for token in tokens): + results.append(name) + + return results + + @cached_property + def package_names(self) -> list[str]: + # should be overridden in subclasses + return [] diff --git a/src/poetry/repositories/link_sources/html.py b/src/poetry/repositories/link_sources/html.py index ef13a876395..83adf4944da 100644 --- a/src/poetry/repositories/link_sources/html.py +++ b/src/poetry/repositories/link_sources/html.py @@ -10,6 +10,7 @@ from poetry.core.packages.utils.link import Link from poetry.repositories.link_sources.base import LinkSource +from poetry.repositories.link_sources.base import SimpleRepositoryRootPage from poetry.repositories.parsers.html_page_parser import HTMLPageParser @@ -68,10 +69,11 @@ def _link_cache(self) -> LinkCache: return links -class SimpleRepositoryRootPage: +class SimpleRepositoryHTMLRootPage(SimpleRepositoryRootPage): """ - This class represents the parsed content of a "simple" repository's root page. This follows the - specification laid out in PEP 503. + This class represents the parsed content of the HTML version + of a "simple" repository's root page. + This follows the specification laid out in PEP 503. See: https://peps.python.org/pep-0503/ """ @@ -81,17 +83,6 @@ def __init__(self, content: str | None = None) -> None: parser.feed(content or "") self._parsed = parser.anchors - def search(self, query: str | list[str]) -> list[str]: - results: list[str] = [] - tokens = query if isinstance(query, list) else [query] - - for anchor in self._parsed: - href = anchor.get("href") - if href and any(token in href for token in tokens): - results.append(href.rstrip("/")) - - return results - @cached_property def package_names(self) -> list[str]: results: list[str] = [] diff --git a/src/poetry/repositories/link_sources/json.py b/src/poetry/repositories/link_sources/json.py index f33a679ab28..6311453cfb5 100644 --- a/src/poetry/repositories/link_sources/json.py +++ b/src/poetry/repositories/link_sources/json.py @@ -1,5 +1,7 @@ from __future__ import annotations +import urllib.parse + from collections import defaultdict from functools import cached_property from typing import TYPE_CHECKING @@ -8,6 +10,7 @@ from poetry.core.packages.utils.link import Link from poetry.repositories.link_sources.base import LinkSource +from poetry.repositories.link_sources.base import SimpleRepositoryRootPage if TYPE_CHECKING: @@ -25,8 +28,9 @@ def __init__(self, url: str, content: dict[str, Any]) -> None: def _link_cache(self) -> LinkCache: links: LinkCache = defaultdict(lambda: defaultdict(list)) for file in self.content["files"]: - url = file["url"] + url = self.clean_link(urllib.parse.urljoin(self._url, file["url"])) requires_python = file.get("requires-python") + hashes = file.get("hashes", {}) yanked = file.get("yanked", False) # see https://peps.python.org/pep-0714/#clients @@ -42,7 +46,11 @@ def _link_cache(self) -> LinkCache: break link = Link( - url, requires_python=requires_python, yanked=yanked, metadata=metadata + url, + requires_python=requires_python, + hashes=hashes, + yanked=yanked, + metadata=metadata, ) if link.ext not in self.SUPPORTED_FORMATS: @@ -53,3 +61,26 @@ def _link_cache(self) -> LinkCache: links[pkg.name][pkg.version].append(link) return links + + +class SimpleRepositoryJsonRootPage(SimpleRepositoryRootPage): + """ + This class represents the parsed content of the JSON version + of a "simple" repository's root page. + This follows the specification laid out in PEP 691. + + See: https://peps.python.org/pep-0691/ + """ + + def __init__(self, content: dict[str, Any]) -> None: + self._content = content + + @cached_property + def package_names(self) -> list[str]: + results: list[str] = [] + + for project in self._content.get("projects", []): + if name := project.get("name"): + results.append(name) + + return results diff --git a/tests/console/commands/test_search.py b/tests/console/commands/test_search.py index 1696d15fec6..5e52817e00b 100644 --- a/tests/console/commands/test_search.py +++ b/tests/console/commands/test_search.py @@ -113,9 +113,10 @@ def test_search_only_legacy_repository( tester.execute("ipython") expected = """\ - Package Version Source Description - ipython 5.7.0 legacy - ipython 7.5.0 legacy + Package Version Source Description + ipython 4.1.0rc1 legacy + ipython 5.7.0 legacy + ipython 7.5.0 legacy """ output = clean_output(tester.io.fetch_output()) @@ -133,11 +134,12 @@ def test_search_multiple_queries( tester.execute("ipython isort") expected = """\ - Package Version Source Description - ipython 5.7.0 legacy - ipython 7.5.0 legacy - isort 4.3.4 legacy - isort-metadata 4.3.4 legacy + Package Version Source Description + ipython 4.1.0rc1 legacy + ipython 5.7.0 legacy + ipython 7.5.0 legacy + isort 4.3.4 legacy + isort-metadata 4.3.4 legacy """ output = clean_output(tester.io.fetch_output()) diff --git a/tests/installation/conftest.py b/tests/installation/conftest.py index 316a2756d74..c15e3cb5466 100644 --- a/tests/installation/conftest.py +++ b/tests/installation/conftest.py @@ -21,7 +21,7 @@ def env() -> MockEnv: @pytest.fixture() -def pool(legacy_repository: LegacyRepository) -> RepositoryPool: +def pool(legacy_repository_html: LegacyRepository) -> RepositoryPool: pool = RepositoryPool() pool.add_repository(PyPiRepository(disable_cache=True)) diff --git a/tests/puzzle/test_solver.py b/tests/puzzle/test_solver.py index 961ccfe733a..eb61f28706c 100644 --- a/tests/puzzle/test_solver.py +++ b/tests/puzzle/test_solver.py @@ -56,6 +56,16 @@ ) +@pytest.fixture +def legacy_repository(legacy_repository_html: LegacyRepository) -> LegacyRepository: + """ + Override fixture to only test with the html version of the legacy repository + because the json version has the same packages as the PyPI repository and thus + cause different results in the tests that rely on differences. + """ + return legacy_repository_html + + def set_package_python_versions(provider: Provider, python_versions: str) -> None: provider._package.python_versions = python_versions provider._package_python_constraint = provider._package.python_constraint diff --git a/tests/repositories/fixtures/legacy.py b/tests/repositories/fixtures/legacy.py index 1dc1fb758b3..f35a0eabad9 100644 --- a/tests/repositories/fixtures/legacy.py +++ b/tests/repositories/fixtures/legacy.py @@ -1,9 +1,11 @@ from __future__ import annotations +import json import re from pathlib import Path from typing import TYPE_CHECKING +from typing import Any from urllib.parse import urlparse import pytest @@ -13,14 +15,16 @@ from poetry.repositories.legacy_repository import LegacyRepository from tests.helpers import FIXTURE_PATH_REPOSITORIES_LEGACY +from tests.helpers import FIXTURE_PATH_REPOSITORIES_PYPI if TYPE_CHECKING: from packaging.utils import NormalizedName + from pytest import FixtureRequest from pytest_mock import MockerFixture from requests import PreparedRequest - from poetry.repositories.link_sources.html import HTMLPage + from poetry.repositories.link_sources.base import LinkSource from tests.types import HttpRequestCallback from tests.types import HttpResponse from tests.types import NormalizedNameTransformer @@ -36,6 +40,14 @@ def legacy_repository_directory() -> Path: return FIXTURE_PATH_REPOSITORIES_LEGACY +@pytest.fixture +def legacy_package_json_locations() -> list[Path]: + return [ + FIXTURE_PATH_REPOSITORIES_LEGACY / "json", + FIXTURE_PATH_REPOSITORIES_PYPI / "json", + ] + + @pytest.fixture def legacy_repository_package_names(legacy_repository_directory: Path) -> set[str]: return { @@ -65,6 +77,14 @@ def legacy_repository_index_html( """ +@pytest.fixture +def legacy_repository_index_json( + legacy_repository_directory: Path, legacy_repository_package_names: set[str] +) -> dict[str, Any]: + names = [{"name": name} for name in legacy_repository_package_names] + return {"meta": {"api-version": "1.4"}, "projects": names} + + @pytest.fixture def legacy_repository_url() -> str: return "https://legacy.foo.bar" @@ -91,7 +111,32 @@ def html_callback(request: PreparedRequest) -> HttpResponse: @pytest.fixture -def legacy_repository( +def legacy_repository_json_callback( + legacy_package_json_locations: list[Path], + legacy_repository_index_json: dict[str, Any], +) -> HttpRequestCallback: + def json_callback(request: PreparedRequest) -> HttpResponse: + assert request.url + headers = {"Content-Type": "application/vnd.pypi.simple.v1+json"} + if name := Path(urlparse(request.url).path).name: + fixture = Path() + for location in legacy_package_json_locations: + fixture = location / f"{name}.json" + if fixture.exists(): + break + + if not fixture.exists(): + return 404, {}, b"Not Found" + + return 200, headers, fixture.read_bytes() + + return 200, headers, json.dumps(legacy_repository_index_json).encode("utf-8") + + return json_callback + + +@pytest.fixture +def legacy_repository_html( http: responses.RequestsMock, legacy_repository_url: str, legacy_repository_html_callback: HttpRequestCallback, @@ -106,9 +151,30 @@ def legacy_repository( return LegacyRepository("legacy", legacy_repository_url, disable_cache=True) +@pytest.fixture +def legacy_repository_json( + http: responses.RequestsMock, + legacy_repository_url: str, + legacy_repository_json_callback: HttpRequestCallback, + mock_files_python_hosted: None, +) -> LegacyRepository: + http.add_callback( + responses.GET, + re.compile(r"^https://legacy\.(.*)+/?(.*)?$"), + callback=legacy_repository_json_callback, + ) + + return LegacyRepository("legacy", legacy_repository_url, disable_cache=True) + + +@pytest.fixture(params=["legacy_repository_html", "legacy_repository_json"]) +def legacy_repository(request: FixtureRequest) -> LegacyRepository: + return request.getfixturevalue(request.param) # type: ignore[no-any-return] + + @pytest.fixture def specialized_legacy_repository_mocker( - legacy_repository: LegacyRepository, + legacy_repository_html: LegacyRepository, legacy_repository_url: str, mocker: MockerFixture, ) -> SpecializedLegacyRepositoryMocker: @@ -127,7 +193,7 @@ def mock( ) original_get_page = specialized_repository._get_page - def _mocked_get_page(name: NormalizedName) -> HTMLPage: + def _mocked_get_page(name: NormalizedName) -> LinkSource: return original_get_page( canonicalize_name(f"{name}{transformer_or_suffix}") if isinstance(transformer_or_suffix, str) diff --git a/tests/repositories/fixtures/legacy/black.html b/tests/repositories/fixtures/legacy/black.html index 092dea09c36..8157a6a5794 100644 --- a/tests/repositories/fixtures/legacy/black.html +++ b/tests/repositories/fixtures/legacy/black.html @@ -5,7 +5,9 @@