diff --git a/pandas-stubs/_libs/arrays.pyi b/pandas-stubs/_libs/arrays.pyi new file mode 100644 index 000000000..59a8da922 --- /dev/null +++ b/pandas-stubs/_libs/arrays.pyi @@ -0,0 +1,38 @@ +from collections.abc import Sequence +from typing import Any + +import numpy as np +from typing_extensions import Self + +from pandas._typing import ( + AnyArrayLikeInt, + AxisInt, + DtypeObj, + Shape, +) + +class NDArrayBacked: + _dtype: DtypeObj + _ndarray: np.ndarray + def __setstate__(self, state: Any) -> None: ... + def __len__(self) -> int: ... + @property + def shape(self) -> Shape: ... + @property + def ndim(self) -> int: ... + @property + def size(self) -> int: ... + @property + def nbytes(self) -> int: ... + def copy(self, order=...) -> Self: ... + def delete(self, loc, axis=...) -> Self: ... + def swapaxes(self, axis1, axis2) -> Self: ... + def repeat( + self, + repeats: int | Sequence[int] | AnyArrayLikeInt, + axis: AxisInt | None = None, + ) -> Self: ... + def reshape(self, *args: Any, **kwargs: Any) -> Self: ... + def ravel(self, order=...) -> Self: ... + @property + def T(self) -> Self: ... diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi index 5ba453ecd..9a9cee1b2 100644 --- a/pandas-stubs/_typing.pyi +++ b/pandas-stubs/_typing.pyi @@ -224,33 +224,35 @@ Dtype: TypeAlias = ExtensionDtype | NpDtype # m # datetime64 +# Builtin bool type and its string alias +BuiltinBooleanDtypeArg: TypeAlias = type[bool] | Literal["bool"] +# Pandas nullable boolean type and its string alias +PandasBooleanDtypeArg: TypeAlias = pd.BooleanDtype | Literal["boolean"] +# Numpy bool type +# https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.bool_ +NumpyBooleanDtypeArg: TypeAlias = type[np.bool_] | Literal["?", "b1", "bool_"] +# PyArrow boolean type and its string alias +PyArrowBooleanDtypeArg: TypeAlias = Literal["bool[pyarrow]", "boolean[pyarrow]"] BooleanDtypeArg: TypeAlias = ( - # Builtin bool type and its string alias - type[bool] # noqa: PYI030 - | Literal["bool"] - # Pandas nullable boolean type and its string alias - | pd.BooleanDtype - | Literal["boolean"] - # Numpy bool type - # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.bool_ - | type[np.bool_] - | Literal["?", "b1", "bool_"] - # PyArrow boolean type and its string alias - | Literal["bool[pyarrow]", "boolean[pyarrow]"] + BuiltinBooleanDtypeArg + | PandasBooleanDtypeArg + | NumpyBooleanDtypeArg + | PyArrowBooleanDtypeArg ) -IntDtypeArg: TypeAlias = ( - # Builtin integer type and its string alias - type[int] # noqa: PYI030 - | Literal["int"] - # Pandas nullable integer types and their string aliases - | pd.Int8Dtype +# Builtin integer type and its string alias +BuiltinIntDtypeArg: TypeAlias = type[int] | Literal["int"] +# Pandas nullable integer types and their string aliases +PandasIntDtypeArg: TypeAlias = ( + pd.Int8Dtype | pd.Int16Dtype | pd.Int32Dtype | pd.Int64Dtype | Literal["Int8", "Int16", "Int32", "Int64"] - # Numpy signed integer types and their string aliases +) +# Numpy signed integer types and their string aliases +NumpyIntDtypeArg: TypeAlias = ( # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.byte - | type[np.byte] + type[np.byte] # noqa: PYI030 | Literal["b", "i1", "int8", "byte"] # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.short | type[np.short] @@ -267,19 +269,26 @@ IntDtypeArg: TypeAlias = ( # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.intp | type[np.intp] # signed pointer (=`intptr_t`, platform dependent) | Literal["p", "intp"] - # PyArrow integer types and their string aliases - | Literal["int8[pyarrow]", "int16[pyarrow]", "int32[pyarrow]", "int64[pyarrow]"] ) -UIntDtypeArg: TypeAlias = ( - # Pandas nullable unsigned integer types and their string aliases - pd.UInt8Dtype # noqa: PYI030 +# PyArrow integer types and their string aliases +PyArrowIntDtypeArg: TypeAlias = Literal[ + "int8[pyarrow]", "int16[pyarrow]", "int32[pyarrow]", "int64[pyarrow]" +] +IntDtypeArg: TypeAlias = ( + BuiltinIntDtypeArg | PandasIntDtypeArg | NumpyIntDtypeArg | PyArrowIntDtypeArg +) +# Pandas nullable unsigned integer types and their string aliases +PandasUIntDtypeArg: TypeAlias = ( + pd.UInt8Dtype | pd.UInt16Dtype | pd.UInt32Dtype | pd.UInt64Dtype | Literal["UInt8", "UInt16", "UInt32", "UInt64"] - # Numpy unsigned integer types and their string aliases +) +# Numpy unsigned integer types and their string aliases +NumpyUIntDtypeArg: TypeAlias = ( # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.ubyte - | type[np.ubyte] + type[np.ubyte] # noqa: PYI030 | Literal["B", "u1", "uint8", "ubyte"] # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.ushort | type[np.ushort] @@ -296,76 +305,78 @@ UIntDtypeArg: TypeAlias = ( # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.uintp | type[np.uintp] # unsigned pointer (=`uintptr_t`, platform dependent) | Literal["P", "uintp"] - # PyArrow unsigned integer types and their string aliases - | Literal["uint8[pyarrow]", "uint16[pyarrow]", "uint32[pyarrow]", "uint64[pyarrow]"] ) -FloatDtypeArg: TypeAlias = ( - # Builtin float type and its string alias - type[float] # noqa: PYI030 - | Literal["float"] - # Pandas nullable float types and their string aliases - | pd.Float32Dtype - | pd.Float64Dtype - | Literal["Float32", "Float64"] - # Numpy float types and their string aliases +# PyArrow unsigned integer types and their string aliases +PyArrowUIntDtypeArg: TypeAlias = Literal[ + "uint8[pyarrow]", "uint16[pyarrow]", "uint32[pyarrow]", "uint64[pyarrow]" +] +UIntDtypeArg: TypeAlias = PandasUIntDtypeArg | NumpyUIntDtypeArg | PyArrowUIntDtypeArg +# Builtin float type and its string alias +BuiltinFloatDtypeArg: TypeAlias = type[float] | Literal["float"] +# Pandas nullable float types and their string aliases +PandasFloatDtypeArg: TypeAlias = ( + pd.Float32Dtype | pd.Float64Dtype | Literal["Float32", "Float64"] +) +PandasAstypeFloatDtypeArg: TypeAlias = Literal["float_", "longdouble", "longfloat"] +# Numpy float types and their string aliases +NumpyFloatDtypeArg: TypeAlias = ( # NOTE: Alias np.float16 only on Linux x86_64, use np.half instead # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.half - | type[np.half] + type[np.half] # noqa: PYI030 | Literal["e", "f2", " IntervalIndex[IntervalT]: ... @overload def unique(values: PeriodIndex) -> PeriodIndex: ... @overload -# switch to DatetimeIndex after Pandas 3.0 +# TODO: switch to DatetimeIndex after Pandas 3.0 pandas-dev/pandas#57064 def unique(values: DatetimeIndex) -> np_1darray_dt | DatetimeIndex: ... @overload -# switch to TimedeltaIndex after Pandas 3.0 +# TODO: switch to TimedeltaIndex after Pandas 3.0 pandas-dev/pandas#57064 def unique(values: TimedeltaIndex) -> np_1darray_td: ... @overload # switch to Index[int] after Pandas 3.0 @@ -50,7 +50,8 @@ def unique(values: RangeIndex) -> np_1darray_int64: ... @overload def unique(values: MultiIndex) -> np_ndarray: ... @overload -def unique(values: Index) -> np_1darray | Index: ... # switch to Index after Pandas 3.0 +# TODO: switch to Index after Pandas 3.0 pandas-dev/pandas#57064 +def unique(values: Index) -> np_1darray | Index: ... @overload def unique(values: Categorical) -> Categorical: ... diff --git a/pandas-stubs/core/arrays/__init__.pyi b/pandas-stubs/core/arrays/__init__.pyi index b9fb5f749..be097e9ca 100644 --- a/pandas-stubs/core/arrays/__init__.pyi +++ b/pandas-stubs/core/arrays/__init__.pyi @@ -1,15 +1,39 @@ +from pandas.core.arrays.arrow import ArrowExtensionArray from pandas.core.arrays.base import ( - ExtensionArray as ExtensionArray, - ExtensionOpsMixin as ExtensionOpsMixin, - ExtensionScalarOpsMixin as ExtensionScalarOpsMixin, + ExtensionArray, + ExtensionOpsMixin, + ExtensionScalarOpsMixin, ) -from pandas.core.arrays.boolean import BooleanArray as BooleanArray -from pandas.core.arrays.categorical import Categorical as Categorical -from pandas.core.arrays.datetimes import DatetimeArray as DatetimeArray -from pandas.core.arrays.integer import IntegerArray as IntegerArray -from pandas.core.arrays.interval import IntervalArray as IntervalArray -from pandas.core.arrays.numpy_ import PandasArray as PandasArray -from pandas.core.arrays.period import PeriodArray as PeriodArray -from pandas.core.arrays.sparse import SparseArray as SparseArray -from pandas.core.arrays.string_ import StringArray as StringArray -from pandas.core.arrays.timedeltas import TimedeltaArray as TimedeltaArray +from pandas.core.arrays.boolean import BooleanArray +from pandas.core.arrays.categorical import Categorical +from pandas.core.arrays.datetimes import DatetimeArray +from pandas.core.arrays.floating import FloatingArray +from pandas.core.arrays.integer import IntegerArray +from pandas.core.arrays.interval import IntervalArray +from pandas.core.arrays.masked import BaseMaskedArray +from pandas.core.arrays.numpy_ import NumpyExtensionArray +from pandas.core.arrays.period import PeriodArray +from pandas.core.arrays.sparse import SparseArray +from pandas.core.arrays.string_ import StringArray +from pandas.core.arrays.string_arrow import ArrowStringArray +from pandas.core.arrays.timedeltas import TimedeltaArray + +__all__ = [ + "ArrowExtensionArray", + "ArrowStringArray", + "BaseMaskedArray", + "BooleanArray", + "Categorical", + "DatetimeArray", + "ExtensionArray", + "ExtensionOpsMixin", + "ExtensionScalarOpsMixin", + "FloatingArray", + "IntegerArray", + "IntervalArray", + "NumpyExtensionArray", + "PeriodArray", + "SparseArray", + "StringArray", + "TimedeltaArray", +] diff --git a/pandas-stubs/core/arrays/_arrow_string_mixins.pyi b/pandas-stubs/core/arrays/_arrow_string_mixins.pyi new file mode 100644 index 000000000..8c2984e50 --- /dev/null +++ b/pandas-stubs/core/arrays/_arrow_string_mixins.pyi @@ -0,0 +1 @@ +class ArrowStringArrayMixin: ... diff --git a/pandas-stubs/core/arrays/_mixins.pyi b/pandas-stubs/core/arrays/_mixins.pyi new file mode 100644 index 000000000..4141b6538 --- /dev/null +++ b/pandas-stubs/core/arrays/_mixins.pyi @@ -0,0 +1,15 @@ +from pandas.core.arrays.base import ExtensionArray +from pandas.core.series import Series +from typing_extensions import Self + +from pandas._libs.arrays import NDArrayBacked +from pandas._typing import ( + AxisInt, + Scalar, +) + +class NDArrayBackedExtensionArray(NDArrayBacked, ExtensionArray): + def argmin(self, axis: AxisInt = 0, skipna: bool = True) -> int: ... + def argmax(self, axis: AxisInt = 0, skipna: bool = True) -> int: ... + def insert(self, loc: int, item: Scalar) -> Self: ... + def value_counts(self, dropna: bool = True) -> Series[int]: ... diff --git a/pandas-stubs/core/arrays/arrow/__init__.pyi b/pandas-stubs/core/arrays/arrow/__init__.pyi index e69de29bb..50274a2de 100644 --- a/pandas-stubs/core/arrays/arrow/__init__.pyi +++ b/pandas-stubs/core/arrays/arrow/__init__.pyi @@ -0,0 +1,7 @@ +from pandas.core.arrays.arrow.accessors import ( + ListAccessor, + StructAccessor, +) +from pandas.core.arrays.arrow.array import ArrowExtensionArray + +__all__ = ["ArrowExtensionArray", "ListAccessor", "StructAccessor"] diff --git a/pandas-stubs/core/arrays/arrow/accessors.pyi b/pandas-stubs/core/arrays/arrow/accessors.pyi new file mode 100644 index 000000000..409d642c7 --- /dev/null +++ b/pandas-stubs/core/arrays/arrow/accessors.pyi @@ -0,0 +1,5 @@ +from abc import ABCMeta + +class ArrowAccessor(metaclass=ABCMeta): ... +class ListAccessor(ArrowAccessor): ... +class StructAccessor(ArrowAccessor): ... diff --git a/pandas-stubs/core/arrays/arrow/array.pyi b/pandas-stubs/core/arrays/arrow/array.pyi new file mode 100644 index 000000000..026fe1955 --- /dev/null +++ b/pandas-stubs/core/arrays/arrow/array.pyi @@ -0,0 +1,7 @@ +from pandas.core.arraylike import OpsMixin +from pandas.core.arrays._arrow_string_mixins import ArrowStringArrayMixin +from pandas.core.arrays.base import ExtensionArraySupportsAnyAll + +class ArrowExtensionArray( + OpsMixin, ExtensionArraySupportsAnyAll, ArrowStringArrayMixin +): ... diff --git a/pandas-stubs/core/arrays/base.pyi b/pandas-stubs/core/arrays/base.pyi index 7ce4f26c6..3ca00baac 100644 --- a/pandas-stubs/core/arrays/base.pyi +++ b/pandas-stubs/core/arrays/base.pyi @@ -87,7 +87,7 @@ class ExtensionArray: ) -> np.intp: ... def factorize(self, use_na_sentinel: bool = True) -> tuple[np_1darray, Self]: ... def repeat( - self, repeats: int | AnyArrayLikeInt | Sequence[int], axis: None = None + self, repeats: int | Sequence[int] | AnyArrayLikeInt, axis: None = None ) -> Self: ... def take( self, @@ -114,6 +114,8 @@ class ExtensionArray: **kwargs: Any, ) -> Self: ... +class ExtensionArraySupportsAnyAll(ExtensionArray): ... + class ExtensionOpsMixin: @classmethod def _add_arithmetic_ops(cls) -> None: ... diff --git a/pandas-stubs/core/arrays/boolean.pyi b/pandas-stubs/core/arrays/boolean.pyi index 478ef8f9a..c3ff2e8e9 100644 --- a/pandas-stubs/core/arrays/boolean.pyi +++ b/pandas-stubs/core/arrays/boolean.pyi @@ -1,9 +1,10 @@ from collections.abc import Sequence -from typing import Any import numpy as np from pandas.core.arrays.integer import IntegerArray from pandas.core.arrays.masked import BaseMaskedArray as BaseMaskedArray +from pandas.core.indexes.base import Index +from pandas.core.series import Series from typing_extensions import Self from pandas._libs.missing import NAType @@ -22,13 +23,21 @@ class BooleanDtype(ExtensionDtype): class BooleanArray(BaseMaskedArray): def __init__( - self, values: np_ndarray_bool, mask: np_ndarray_bool, copy: bool = ... + self, + values: ( + Sequence[bool | np.bool] + | np_ndarray_bool + | Index[bool] + | Series[bool] + | Self + ), + mask: np_ndarray_bool, + copy: bool = False, ) -> None: ... @property - def dtype(self): ... - def __setitem__(self, key, value) -> None: ... - def any(self, *, skipna: bool = ..., **kwargs: Any): ... - def all(self, *, skipna: bool = ..., **kwargs: Any): ... + def dtype( # type: ignore[override] # pyright: ignore[reportIncompatibleMethodOverride] + self, + ) -> np.dtypes.BoolDType: ... def __and__( self, other: ( diff --git a/pandas-stubs/core/arrays/datetimes.pyi b/pandas-stubs/core/arrays/datetimes.pyi index 8067b195f..3932eb452 100644 --- a/pandas-stubs/core/arrays/datetimes.pyi +++ b/pandas-stubs/core/arrays/datetimes.pyi @@ -1,4 +1,5 @@ -from datetime import tzinfo as _tzinfo +from collections.abc import Sequence +from datetime import datetime import numpy as np from pandas.core.arrays.datetimelike import ( @@ -6,78 +7,27 @@ from pandas.core.arrays.datetimelike import ( DatetimeLikeArrayMixin, TimelikeOps, ) +from pandas.core.indexes.datetimes import DatetimeIndex +from pandas.core.series import Series +from typing_extensions import Self -from pandas._typing import ( - TimeAmbiguous, - TimeNonexistent, - TimeZones, -) - -from pandas.core.dtypes.dtypes import DatetimeTZDtype as DatetimeTZDtype +from pandas._libs.tslibs.timestamps import Timestamp class DatetimeArray(DatetimeLikeArrayMixin, TimelikeOps, DatelikeOps): __array_priority__: int = ... - def __init__(self, values, dtype=..., freq=..., copy: bool = ...) -> None: ... - # ignore in dtype() is from the pandas source - @property - def dtype(self) -> np.dtype | DatetimeTZDtype: ... # type: ignore[override] # pyright: ignore[reportIncompatibleMethodOverride] - @property - def tz(self): ... - @tz.setter - def tz(self, value) -> None: ... - @property - def tzinfo(self) -> _tzinfo | None: ... - @property - def is_normalized(self): ... - def __iter__(self): ... - def tz_convert(self, tz: TimeZones): ... - def tz_localize( + def __init__( self, - tz: TimeZones, - ambiguous: TimeAmbiguous = "raise", - nonexistent: TimeNonexistent = "raise", - ): ... - def to_pydatetime(self): ... - def normalize(self): ... - def to_period(self, freq=...): ... - def to_perioddelta(self, freq): ... - def month_name(self, locale=...): ... - def day_name(self, locale=...): ... - @property - def time(self): ... - @property - def timetz(self): ... + values: ( + Sequence[datetime | np.datetime64] + | np.typing.NDArray[np.datetime64] + | DatetimeIndex + | Series[Timestamp] + | Self + ), + dtype: np.dtype | None = None, + copy: bool = False, + ) -> None: ... @property - def date(self): ... - year = ... - month = ... - day = ... - hour = ... - minute = ... - second = ... - microsecond = ... - nanosecond = ... - dayofweek = ... - weekday = ... - dayofyear = ... - quarter = ... - days_in_month = ... - daysinmonth = ... - is_month_start = ... - is_month_end = ... - is_quarter_start = ... - is_quarter_end = ... - is_year_start = ... - is_year_end = ... - is_leap_year = ... - def to_julian_date(self): ... - -def objects_to_datetime64ns( - data, - dayfirst, - yearfirst, - utc: bool = ..., - errors: str = ..., - require_iso8601: bool = ..., - allow_object: bool = ..., -): ... + def dtype( # type: ignore[override] # pyright: ignore[reportIncompatibleMethodOverride] + self, + ) -> np.dtypes.DateTime64DType: ... diff --git a/pandas-stubs/core/arrays/floating.pyi b/pandas-stubs/core/arrays/floating.pyi index 78d72f875..682756167 100644 --- a/pandas-stubs/core/arrays/floating.pyi +++ b/pandas-stubs/core/arrays/floating.pyi @@ -1,4 +1,8 @@ -from pandas.core.arrays.numeric import NumericDtype +from pandas.core.arrays.numeric import ( + NumericArray, + NumericDtype, +) class Float32Dtype(NumericDtype): ... class Float64Dtype(NumericDtype): ... +class FloatingArray(NumericArray): ... diff --git a/pandas-stubs/core/arrays/integer.pyi b/pandas-stubs/core/arrays/integer.pyi index a32233414..3ed2b5d2f 100644 --- a/pandas-stubs/core/arrays/integer.pyi +++ b/pandas-stubs/core/arrays/integer.pyi @@ -1,3 +1,4 @@ +import numpy as np from pandas.core.arrays.masked import BaseMaskedArray from pandas._libs.missing import NAType @@ -16,8 +17,12 @@ class _IntegerDtype(ExtensionDtype): class IntegerArray(BaseMaskedArray): @property def dtype(self) -> _IntegerDtype: ... - def __init__(self, values, mask, copy: bool = ...) -> None: ... - def __setitem__(self, key, value) -> None: ... + def __init__( + self, + values: np.typing.NDArray[np.integer], + mask: np.typing.NDArray[np.bool], + copy: bool = False, + ) -> None: ... class Int8Dtype(_IntegerDtype): ... class Int16Dtype(_IntegerDtype): ... diff --git a/pandas-stubs/core/arrays/numeric.pyi b/pandas-stubs/core/arrays/numeric.pyi index eacf8825b..5ea899dac 100644 --- a/pandas-stubs/core/arrays/numeric.pyi +++ b/pandas-stubs/core/arrays/numeric.pyi @@ -1,3 +1,11 @@ +from pandas.core.arrays.masked import BaseMaskedArray + +from pandas._libs.properties import cache_readonly + from pandas.core.dtypes.dtypes import BaseMaskedDtype class NumericDtype(BaseMaskedDtype): ... + +class NumericArray(BaseMaskedArray): + @cache_readonly + def dtype(self) -> NumericDtype: ... diff --git a/pandas-stubs/core/arrays/numpy_.pyi b/pandas-stubs/core/arrays/numpy_.pyi index 760d82e84..afe191746 100644 --- a/pandas-stubs/core/arrays/numpy_.pyi +++ b/pandas-stubs/core/arrays/numpy_.pyi @@ -1,16 +1,7 @@ -import numpy as np -from numpy.lib.mixins import NDArrayOperatorsMixin -from pandas.core.arrays.base import ( - ExtensionArray, - ExtensionOpsMixin, -) +from pandas.core.arraylike import OpsMixin +from pandas.core.arrays._mixins import NDArrayBackedExtensionArray +from pandas.core.strings.object_array import ObjectStringArrayMixin -from pandas.core.dtypes.dtypes import ExtensionDtype - -class PandasDtype(ExtensionDtype): - @property - def numpy_dtype(self) -> np.dtype: ... - @property - def itemsize(self) -> int: ... - -class PandasArray(ExtensionArray, ExtensionOpsMixin, NDArrayOperatorsMixin): ... +class NumpyExtensionArray( + OpsMixin, NDArrayBackedExtensionArray, ObjectStringArrayMixin +): ... diff --git a/pandas-stubs/core/arrays/string_.pyi b/pandas-stubs/core/arrays/string_.pyi index 845870de7..556c92436 100644 --- a/pandas-stubs/core/arrays/string_.pyi +++ b/pandas-stubs/core/arrays/string_.pyi @@ -1,6 +1,7 @@ from typing import Literal -from pandas.core.arrays import PandasArray +from pandas.core.arrays.base import ExtensionArray +from pandas.core.arrays.numpy_ import NumpyExtensionArray from pandas._libs.missing import NAType @@ -11,7 +12,9 @@ class StringDtype(ExtensionDtype): @property def na_value(self) -> NAType: ... -class StringArray(PandasArray): +class BaseStringArray(ExtensionArray): ... + +class StringArray(BaseStringArray, NumpyExtensionArray): def __init__(self, values, copy: bool = ...) -> None: ... def __arrow_array__(self, type=...): ... def __setitem__(self, key, value) -> None: ... diff --git a/pandas-stubs/core/arrays/string_arrow.pyi b/pandas-stubs/core/arrays/string_arrow.pyi new file mode 100644 index 000000000..20be1de04 --- /dev/null +++ b/pandas-stubs/core/arrays/string_arrow.pyi @@ -0,0 +1,7 @@ +from pandas.core.arrays.arrow.array import ArrowExtensionArray +from pandas.core.arrays.string_ import BaseStringArray +from pandas.core.strings.object_array import ObjectStringArrayMixin + +class ArrowStringArray( + ObjectStringArrayMixin, ArrowExtensionArray, BaseStringArray +): ... diff --git a/pandas-stubs/core/arrays/timedeltas.pyi b/pandas-stubs/core/arrays/timedeltas.pyi index fd0d71c99..1994bcaca 100644 --- a/pandas-stubs/core/arrays/timedeltas.pyi +++ b/pandas-stubs/core/arrays/timedeltas.pyi @@ -1,64 +1,38 @@ from collections.abc import Sequence from datetime import timedelta +import numpy as np from pandas.core.arrays.datetimelike import ( DatetimeLikeArrayMixin, TimelikeOps, ) +from pandas.core.indexes.timedeltas import TimedeltaIndex +from pandas.core.series import Series +from typing_extensions import Self + +from pandas._libs.tslibs.timedeltas import Timedelta +from pandas._typing import np_1darray + +from pandas.core.dtypes.base import ExtensionDtype class TimedeltaArray(DatetimeLikeArrayMixin, TimelikeOps): __array_priority__: int = ... @property - def dtype(self): ... - def __init__(self, values, dtype=..., freq=..., copy: bool = ...) -> None: ... - def sum( - self, - *, - axis=..., - dtype=..., - out=..., - keepdims: bool = ..., - initial=..., - skipna: bool = ..., - min_count: int = ..., - ): ... - def std( - self, - *, - axis=..., - dtype=..., - out=..., - ddof: int = ..., - keepdims: bool = ..., - skipna: bool = ..., - ): ... - def median( + def dtype( # type: ignore[override] # pyright: ignore[reportIncompatibleMethodOverride] self, + ) -> np.dtypes.TimeDelta64DType: ... + def __init__(self, values, dtype=..., freq=..., copy: bool = ...) -> None: ... + @classmethod + def _from_sequence( + cls, + data: ( + Sequence[timedelta | np.timedelta64] + | np_1darray[np.timedelta64] + | TimedeltaIndex + | Series[Timedelta] + | Self + ), *, - axis=..., - out=..., - overwrite_input: bool = ..., - keepdims: bool = ..., - skipna: bool = ..., - ): ... - def __mul__(self, other): ... - __rmul__ = ... - def __truediv__(self, other): ... - def __rtruediv__(self, other): ... - def __floordiv__(self, other): ... - def __rfloordiv__(self, other): ... - def __mod__(self, other): ... - def __rmod__(self, other): ... - def __divmod__(self, other): ... - def __rdivmod__(self, other): ... - def __neg__(self): ... - def __pos__(self): ... - def __abs__(self): ... - def total_seconds(self) -> int: ... - def to_pytimedelta(self) -> Sequence[timedelta]: ... - days: int = ... - seconds: int = ... - microseconds: int = ... - nanoseconds: int = ... - @property - def components(self) -> int: ... + dtype: np.dtype | ExtensionDtype | None = None, + copy: bool = True, + ) -> TimedeltaArray: ... diff --git a/pandas-stubs/core/construction.pyi b/pandas-stubs/core/construction.pyi index 58872d197..df7195b58 100644 --- a/pandas-stubs/core/construction.pyi +++ b/pandas-stubs/core/construction.pyi @@ -1,35 +1,193 @@ from collections.abc import Sequence -from typing import overload +from datetime import ( + datetime, + timedelta, +) +from typing import ( + Any, + overload, +) import numpy as np +from pandas.core.arrays.arrow.array import ArrowExtensionArray from pandas.core.arrays.base import ExtensionArray from pandas.core.arrays.boolean import BooleanArray +from pandas.core.arrays.categorical import Categorical +from pandas.core.arrays.datetimes import DatetimeArray +from pandas.core.arrays.floating import FloatingArray from pandas.core.arrays.integer import IntegerArray +from pandas.core.arrays.interval import IntervalArray +from pandas.core.arrays.numpy_ import NumpyExtensionArray +from pandas.core.arrays.period import PeriodArray +from pandas.core.arrays.sparse.array import SparseArray +from pandas.core.arrays.string_ import StringArray +from pandas.core.arrays.string_arrow import ArrowStringArray +from pandas.core.arrays.timedeltas import TimedeltaArray +from pandas.core.indexes.base import Index +from pandas.core.indexes.category import CategoricalIndex +from pandas.core.indexes.datetimes import DatetimeIndex +from pandas.core.indexes.interval import IntervalIndex +from pandas.core.indexes.period import PeriodIndex +from pandas.core.indexes.range import RangeIndex +from pandas.core.indexes.timedeltas import TimedeltaIndex +from pandas.core.series import Series +from pandas._libs.interval import Interval from pandas._libs.missing import NAType +from pandas._libs.sparse import SparseIndex +from pandas._libs.tslibs.nattype import NaTType +from pandas._libs.tslibs.period import Period +from pandas._libs.tslibs.timedeltas import Timedelta +from pandas._libs.tslibs.timestamps import Timestamp from pandas._typing import ( - BooleanDtypeArg, - IntDtypeArg, - UIntDtypeArg, + BuiltinDtypeArg, + CategoryDtypeArg, + IntervalT, + NumpyNotTimeDtypeArg, + NumpyTimedeltaDtypeArg, + NumpyTimestampDtypeArg, + PandasBooleanDtypeArg, + PandasFloatDtypeArg, + PandasIntDtypeArg, + PandasStrDtypeArg, + PandasTimestampDtypeArg, + PandasUIntDtypeArg, + PyArrowNotStrDtypeArg, + PyArrowStrDtypeArg, + SequenceNotStr, + np_ndarray, + np_ndarray_anyint, + np_ndarray_bool, + np_ndarray_dt, + np_ndarray_float, + np_ndarray_str, + np_ndarray_td, ) -from pandas.core.dtypes.dtypes import ExtensionDtype +from pandas.core.dtypes.dtypes import ( + ExtensionDtype, + IntervalDtype, + PeriodDtype, +) +# @overload +# def array( +# data: SequenceNotStr[NAType | None], +# dtype: None = None, +# copy: bool = True +# ) -> NumpyExtensionArray: ... +@overload +def array( # type: ignore[overload-overlap] + data: SequenceNotStr[Any] | np_ndarray | ExtensionArray | Index | Series, + dtype: CategoryDtypeArg, + copy: bool = True, +) -> Categorical: ... +@overload +def array( + # TODO: Categorical Series pandas-dev/pandas-stubs#1415 + data: Categorical | CategoricalIndex, + dtype: CategoryDtypeArg | None = None, + copy: bool = True, +) -> Categorical: ... @overload def array( # type: ignore[overload-overlap] # pyright: ignore[reportOverlappingOverload] - data: Sequence[bool | NAType | None], - dtype: BooleanDtypeArg | None = None, + data: ( + Sequence[Period | NaTType | None] | PeriodArray | PeriodIndex | Series[Period] + ), + dtype: PeriodDtype | None = None, copy: bool = True, -) -> BooleanArray: ... +) -> PeriodArray: ... +@overload +def array( # type: ignore[overload-overlap] + # float("nan") also works, but I don't know how to put it in + data: Sequence[IntervalT | None] | IntervalArray | IntervalIndex | Series[Interval], + dtype: IntervalDtype | None = None, + copy: bool = True, +) -> IntervalArray: ... @overload def array( - data: Sequence[int | NAType | None], - dtype: IntDtypeArg | UIntDtypeArg | None = None, + data: SparseArray | SparseIndex, + dtype: str | np.dtype | ExtensionDtype | None = None, + copy: bool = True, +) -> SparseArray: ... +@overload +def array( # type: ignore[overload-overlap] # pyright: ignore[reportOverlappingOverload] + data: Sequence[bool | np.bool | NAType | None] | np_ndarray_bool | BooleanArray, + dtype: PandasBooleanDtypeArg | None = None, + copy: bool = True, +) -> BooleanArray: ... +@overload +def array( # type: ignore[overload-overlap] + data: Sequence[int | np.integer | NAType | None] | np_ndarray_anyint | IntegerArray, + dtype: PandasIntDtypeArg | PandasUIntDtypeArg | None = None, copy: bool = True, ) -> IntegerArray: ... @overload +def array( # type: ignore[overload-overlap] + data: ( + Sequence[float | np.floating | NAType | None] | np_ndarray_float | FloatingArray + ), + dtype: PandasFloatDtypeArg | None = None, + copy: bool = True, +) -> FloatingArray: ... +@overload +def array( # type: ignore[overload-overlap] + data: ( # TODO: merge the two Sequence's after 3.0 pandas-dev/pandas#57064 + Sequence[datetime | NaTType | None] + | Sequence[np.datetime64 | NaTType | None] + | np_ndarray_dt + | DatetimeArray + | DatetimeIndex + | Series[Timestamp] + ), + dtype: PandasTimestampDtypeArg | NumpyTimestampDtypeArg | None = None, + copy: bool = True, +) -> DatetimeArray: ... +@overload def array( - data: Sequence[object], - dtype: str | np.dtype | ExtensionDtype | None = None, + data: ( + Sequence[timedelta | np.timedelta64 | NaTType | None] + | np_ndarray_td + | TimedeltaArray + | TimedeltaIndex + | Series[Timedelta] + ), + dtype: NumpyTimedeltaDtypeArg | None = None, copy: bool = True, -) -> ExtensionArray: ... +) -> TimedeltaArray: ... +@overload +def array( # type: ignore[overload-overlap] # pyright: ignore[reportOverlappingOverload] + data: SequenceNotStr[str | np.str_ | NAType | None] | np_ndarray_str | StringArray, + dtype: PandasStrDtypeArg | None = None, + copy: bool = True, +) -> StringArray: ... +@overload +def array( # type: ignore[overload-overlap] + data: ( + SequenceNotStr[str | np.str_ | NAType | None] + | np_ndarray_str + | StringArray + | ArrowStringArray + ), + dtype: PyArrowStrDtypeArg | None = None, + copy: bool = True, +) -> ArrowStringArray: ... +@overload +def array( + data: SequenceNotStr[object] | np_ndarray | NumpyExtensionArray | RangeIndex, + dtype: BuiltinDtypeArg | NumpyNotTimeDtypeArg | None = None, + copy: bool = True, +) -> NumpyExtensionArray: ... +@overload +def array( + data: ArrowExtensionArray, + dtype: PyArrowNotStrDtypeArg | None = None, + copy: bool = True, +) -> ArrowExtensionArray: ... + +# @overload +# def array( +# data: SequenceNotStr[Any] | np_ndarray | ExtensionArray | Index | Series, +# dtype: str | np.dtype | ExtensionDtype | None = None, +# copy: bool = True, +# ) -> ExtensionArray: ... diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi index 654679fbd..2807846be 100644 --- a/pandas-stubs/core/series.pyi +++ b/pandas-stubs/core/series.pyi @@ -48,7 +48,6 @@ from matplotlib.axes import ( SubplotBase, ) import numpy as np -from numpy import typing as npt from pandas import ( Index, Period, @@ -63,12 +62,15 @@ from pandas.core.api import ( Int64Dtype as Int64Dtype, ) from pandas.core.arrays.base import ExtensionArray -from pandas.core.arrays.boolean import BooleanDtype +from pandas.core.arrays.boolean import ( + BooleanDtype, +) from pandas.core.arrays.categorical import ( Categorical, CategoricalAccessor, ) from pandas.core.arrays.datetimes import DatetimeArray +from pandas.core.arrays.floating import FloatingArray from pandas.core.arrays.timedeltas import TimedeltaArray from pandas.core.base import ( ArrayIndexSeriesTimedeltaNoSeq, @@ -190,6 +192,10 @@ from pandas._typing import ( NaPosition, NsmallestNlargestKeep, ObjectDtypeArg, + PandasAstypeComplexDtypeArg, + PandasAstypeFloatDtypeArg, + PandasAstypeTimedeltaDtypeArg, + PandasAstypeTimestampDtypeArg, PeriodFrequency, QuantileInterpolation, RandomState, @@ -343,41 +349,32 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame): __index__: ClassVar[None] __hash__: ClassVar[None] # pyright: ignore[reportIncompatibleMethodOverride] - @overload - def __new__( - cls, - data: npt.NDArray[np.float64], - index: AxesData | None = ..., - dtype: Dtype = ..., - name: Hashable = ..., - copy: bool = ..., - ) -> Series[float]: ... @overload def __new__( cls, data: Sequence[Never], - index: AxesData | None = ..., - dtype: Dtype = ..., - name: Hashable = ..., - copy: bool = ..., + index: AxesData | None = None, + dtype: None = None, + name: Hashable = None, + copy: bool | None = None, ) -> Series: ... @overload def __new__( cls, data: Sequence[list[_str]], - index: AxesData | None = ..., - dtype: Dtype = ..., - name: Hashable = ..., - copy: bool = ..., + index: AxesData | None = None, + dtype: None = None, + name: Hashable = None, + copy: bool | None = None, ) -> Series[list[_str]]: ... @overload def __new__( cls, data: Sequence[_str], - index: AxesData | None = ..., - dtype: Dtype = ..., - name: Hashable = ..., - copy: bool = ..., + index: AxesData | None = None, + dtype: Dtype | None = None, + name: Hashable = None, + copy: bool | None = None, ) -> Series[_str]: ... @overload def __new__( @@ -390,48 +387,48 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame): | datetime | date ), - index: AxesData | None = ..., + index: AxesData | None = None, dtype: TimestampDtypeArg = ..., - name: Hashable = ..., - copy: bool = ..., + name: Hashable = None, + copy: bool | None = None, ) -> Series[Timestamp]: ... @overload def __new__( cls, - data: _DataLike, - index: AxesData | None = ..., + data: Sequence[datetime | np.timedelta64] | np_ndarray_dt | DatetimeArray, + index: AxesData | None = None, *, dtype: TimestampDtypeArg, - name: Hashable = ..., - copy: bool = ..., + name: Hashable = None, + copy: bool | None = None, ) -> Series[Timestamp]: ... @overload def __new__( cls, data: _DataLike, - index: AxesData | None = ..., + index: AxesData | None = None, *, dtype: CategoryDtypeArg, - name: Hashable = ..., - copy: bool = ..., + name: Hashable = None, + copy: bool | None = None, ) -> Series[CategoricalDtype]: ... @overload def __new__( cls, data: PeriodIndex | Sequence[Period], - index: AxesData | None = ..., + index: AxesData | None = None, dtype: PeriodDtype = ..., - name: Hashable = ..., - copy: bool = ..., + name: Hashable = None, + copy: bool | None = None, ) -> Series[Period]: ... @overload def __new__( cls, data: Sequence[BaseOffset], - index: AxesData | None = ..., + index: AxesData | None = None, dtype: PeriodDtype = ..., - name: Hashable = ..., - copy: bool = ..., + name: Hashable = None, + copy: bool | None = None, ) -> Series[BaseOffset]: ... @overload def __new__( @@ -443,10 +440,10 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame): | np.timedelta64 | timedelta ), - index: AxesData | None = ..., + index: AxesData | None = None, dtype: TimedeltaDtypeArg = ..., - name: Hashable = ..., - copy: bool = ..., + name: Hashable = None, + copy: bool | None = None, ) -> Series[Timedelta]: ... @overload def __new__( @@ -457,56 +454,47 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame): | Sequence[Interval[_OrderableT]] | dict[HashableT1, Interval[_OrderableT]] ), - index: AxesData | None = ..., + index: AxesData | None = None, dtype: Literal["Interval"] = ..., - name: Hashable = ..., - copy: bool = ..., + name: Hashable = None, + copy: bool | None = None, ) -> Series[Interval[_OrderableT]]: ... @overload def __new__( # type: ignore[overload-overlap] cls, data: Scalar | _DataLike | dict[HashableT1, Any] | None, - index: AxesData | None = ..., + index: AxesData | None = None, *, dtype: type[S1], - name: Hashable = ..., - copy: bool = ..., + name: Hashable = None, + copy: bool | None = None, ) -> Self: ... @overload - def __new__( # type: ignore[overload-overlap] # pyright: ignore[reportOverlappingOverload] + def __new__( # pyright: ignore[reportOverlappingOverload] cls, - data: Sequence[bool], - index: AxesData | None = ..., - dtype: Dtype = ..., - name: Hashable = ..., - copy: bool = ..., + data: Sequence[bool | np.bool], + index: AxesData | None = None, + dtype: Dtype | None = None, + name: Hashable = None, + copy: bool | None = None, ) -> Series[bool]: ... @overload - def __new__( # type: ignore[overload-overlap] + def __new__( cls, - data: Sequence[int], - index: AxesData | None = ..., - dtype: Dtype = ..., - name: Hashable = ..., - copy: bool = ..., + data: Sequence[int | np.integer], + index: AxesData | None = None, + dtype: Dtype | None = None, + name: Hashable = None, + copy: bool | None = None, ) -> Series[int]: ... @overload def __new__( cls, - data: Sequence[float], - index: AxesData | None = ..., - dtype: Dtype = ..., - name: Hashable = ..., - copy: bool = ..., - ) -> Series[float]: ... - @overload - def __new__( # type: ignore[overload-cannot-match] # pyright: ignore[reportOverlappingOverload] - cls, - data: Sequence[int | float], - index: AxesData | None = ..., - dtype: Dtype = ..., - name: Hashable = ..., - copy: bool = ..., + data: Sequence[float | np.floating] | np_ndarray_float | FloatingArray, + index: AxesData | None = None, + dtype: FloatDtypeArg | None = None, + name: Hashable = None, + copy: bool | None = None, ) -> Series[float]: ... @overload def __new__( @@ -514,10 +502,10 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame): data: ( S1 | _DataLikeS1[S1] | dict[HashableT1, S1] | KeysView[S1] | ValuesView[S1] ), - index: AxesData | None = ..., - dtype: Dtype = ..., - name: Hashable = ..., - copy: bool = ..., + index: AxesData | None = None, + dtype: Dtype | None = None, + name: Hashable = None, + copy: bool | None = None, ) -> Self: ... @overload def __new__( @@ -530,11 +518,11 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame): | NaTType | NAType | None - ) = ..., - index: AxesData | None = ..., - dtype: Dtype = ..., - name: Hashable = ..., - copy: bool = ..., + ) = None, + index: AxesData | None = None, + dtype: Dtype | None = None, + name: Hashable = None, + copy: bool | None = None, ) -> Series: ... @property def hasnans(self) -> bool: ... @@ -1446,28 +1434,28 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame): @overload def astype( self, - dtype: FloatDtypeArg, + dtype: FloatDtypeArg | PandasAstypeFloatDtypeArg, copy: _bool = ..., errors: IgnoreRaise = ..., ) -> Series[float]: ... @overload def astype( self, - dtype: ComplexDtypeArg, + dtype: ComplexDtypeArg | PandasAstypeComplexDtypeArg, copy: _bool = ..., errors: IgnoreRaise = ..., ) -> Series[complex]: ... @overload def astype( self, - dtype: TimedeltaDtypeArg, + dtype: TimedeltaDtypeArg | PandasAstypeTimedeltaDtypeArg, copy: _bool = ..., errors: IgnoreRaise = ..., ) -> Series[Timedelta]: ... @overload def astype( self, - dtype: TimestampDtypeArg, + dtype: TimestampDtypeArg | PandasAstypeTimestampDtypeArg, copy: _bool = ..., errors: IgnoreRaise = ..., ) -> Series[Timestamp]: ... diff --git a/pandas-stubs/core/strings/object_array.pyi b/pandas-stubs/core/strings/object_array.pyi new file mode 100644 index 000000000..3fd853b0f --- /dev/null +++ b/pandas-stubs/core/strings/object_array.pyi @@ -0,0 +1 @@ +class ObjectStringArrayMixin: ... diff --git a/tests/__init__.py b/tests/__init__.py index 701e1757d..63e1a27dd 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,5 +1,9 @@ from __future__ import annotations +from collections.abc import ( + Callable, + Generator, +) from contextlib import ( AbstractContextManager, nullcontext, @@ -9,6 +13,7 @@ import platform from typing import ( TYPE_CHECKING, + Any, Final, Literal, TypeAlias, @@ -28,16 +33,26 @@ from pandas.util.version import Version import pytest +from pandas.core.dtypes.base import ExtensionDtype + if TYPE_CHECKING: from pandas._typing import ( BooleanDtypeArg as BooleanDtypeArg, + BuiltinDtypeArg as BuiltinDtypeArg, BytesDtypeArg as BytesDtypeArg, CategoryDtypeArg as CategoryDtypeArg, ComplexDtypeArg as ComplexDtypeArg, Dtype as Dtype, FloatDtypeArg as FloatDtypeArg, IntDtypeArg as IntDtypeArg, + NumpyNotTimeDtypeArg as NumpyNotTimeDtypeArg, ObjectDtypeArg as ObjectDtypeArg, + PandasAstypeComplexDtypeArg as PandasAstypeComplexDtypeArg, + PandasAstypeFloatDtypeArg as PandasAstypeFloatDtypeArg, + PandasAstypeTimedeltaDtypeArg as PandasAstypeTimedeltaDtypeArg, + PandasAstypeTimestampDtypeArg as PandasAstypeTimestampDtypeArg, + PandasBooleanDtypeArg as PandasBooleanDtypeArg, + PandasFloatDtypeArg as PandasFloatDtypeArg, StrDtypeArg as StrDtypeArg, T as T, TimedeltaDtypeArg as TimedeltaDtypeArg, @@ -67,6 +82,355 @@ np_ndarray_td as np_ndarray_td, ) else: + # Builtin bool type and its string alias + BuiltinBooleanDtypeArg: TypeAlias = type[bool] | Literal["bool"] + # Pandas nullable boolean type and its string alias + PandasBooleanDtypeArg: TypeAlias = pd.BooleanDtype | Literal["boolean"] + # Numpy bool type + # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.bool_ + NumpyBooleanDtypeArg: TypeAlias = type[np.bool_] | Literal["?", "b1", "bool_"] + # PyArrow boolean type and its string alias + PyArrowBooleanDtypeArg: TypeAlias = Literal["bool[pyarrow]", "boolean[pyarrow]"] + BooleanDtypeArg: TypeAlias = ( + BuiltinBooleanDtypeArg + | PandasBooleanDtypeArg + | NumpyBooleanDtypeArg + | PyArrowBooleanDtypeArg + ) + # Builtin integer type and its string alias + BuiltinIntDtypeArg: TypeAlias = type[int] | Literal["int"] + # Pandas nullable integer types and their string aliases + PandasIntDtypeArg: TypeAlias = ( + pd.Int8Dtype + | pd.Int16Dtype + | pd.Int32Dtype + | pd.Int64Dtype + | Literal["Int8", "Int16", "Int32", "Int64"] + ) + # Numpy signed integer types and their string aliases + NumpyIntDtypeArg: TypeAlias = ( + # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.byte + type[np.byte] # noqa: PYI030 + | Literal["b", "i1", "int8", "byte"] + # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.short + | type[np.short] + | Literal["h", "i2", "int16", "short"] + # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.intc + | type[np.intc] + | Literal["i", "i4", "int32", "intc"] + # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.int_ + | type[np.int_] + | Literal["l", "i8", "int64", "int_", "long"] + # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.longlong + | type[np.longlong] + | Literal["q", "longlong"] # NOTE: int128 not assigned + # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.intp + | type[np.intp] # signed pointer (=`intptr_t`, platform dependent) + | Literal["p", "intp"] + ) + # PyArrow integer types and their string aliases + PyArrowIntDtypeArg: TypeAlias = Literal[ + "int8[pyarrow]", "int16[pyarrow]", "int32[pyarrow]", "int64[pyarrow]" + ] + IntDtypeArg: TypeAlias = ( + BuiltinIntDtypeArg | PandasIntDtypeArg | NumpyIntDtypeArg | PyArrowIntDtypeArg + ) + # Pandas nullable unsigned integer types and their string aliases + PandasUIntDtypeArg: TypeAlias = ( + pd.UInt8Dtype + | pd.UInt16Dtype + | pd.UInt32Dtype + | pd.UInt64Dtype + | Literal["UInt8", "UInt16", "UInt32", "UInt64"] + ) + # Numpy unsigned integer types and their string aliases + NumpyUIntDtypeArg: TypeAlias = ( + # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.ubyte + type[np.ubyte] # noqa: PYI030 + | Literal["B", "u1", "uint8", "ubyte"] + # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.ushort + | type[np.ushort] + | Literal["H", "u2", "uint16", "ushort"] + # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.uintc + | type[np.uintc] + | Literal["I", "u4", "uint32", "uintc"] + # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.uint + | type[np.uint] + | Literal["L", "u8", "uint", "ulong", "uint64"] + # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.ulonglong + | type[np.ulonglong] + | Literal["Q", "ulonglong"] # NOTE: uint128 not assigned + # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.uintp + | type[np.uintp] # unsigned pointer (=`uintptr_t`, platform dependent) + | Literal["P", "uintp"] + ) + # PyArrow unsigned integer types and their string aliases + PyArrowUIntDtypeArg: TypeAlias = Literal[ + "uint8[pyarrow]", "uint16[pyarrow]", "uint32[pyarrow]", "uint64[pyarrow]" + ] + UIntDtypeArg: TypeAlias = ( + PandasUIntDtypeArg | NumpyUIntDtypeArg | PyArrowUIntDtypeArg + ) + # Builtin float type and its string alias + BuiltinFloatDtypeArg: TypeAlias = type[float] | Literal["float"] + # Pandas nullable float types and their string aliases + PandasFloatDtypeArg: TypeAlias = ( + pd.Float32Dtype | pd.Float64Dtype | Literal["Float32", "Float64"] + ) + PandasAstypeFloatDtypeArg: TypeAlias = Literal["float_", "longdouble", "longfloat"] + # Numpy float types and their string aliases + NumpyFloatDtypeArg: TypeAlias = ( + # NOTE: Alias np.float16 only on Linux x86_64, use np.half instead + # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.half + type[np.half] # noqa: PYI030 + | Literal["e", "f2", "= "2.0.0" +TYPE_FLOAT_ARGS: list[tuple[FloatDtypeArg, type]] = [ + # python float + (float, np.floating), + ("float", np.floating), + # pandas Float32 + (pd.Float32Dtype(), np.float32), + ("Float32", np.float32), + # pandas Float64 + (pd.Float64Dtype(), np.float64), + ("Float64", np.float64), + # numpy float16 + (np.half, np.half), + ("half", np.half), + ("e", np.half), + ("float16", np.float16), + ("f2", np.float16), + # numpy float32 + (np.single, np.single), + ("single", np.single), + ("f", np.single), + ("float32", np.float32), + ("f4", np.float32), + # numpy float64 + (np.double, np.double), + ("double", np.double), + ("d", np.double), + ("float64", np.float64), + ("f8", np.float64), + # numpy float128 + (np.longdouble, np.longdouble), + ("g", np.longdouble), + # pyarrow float32 + ("float32[pyarrow]", float), + ("float[pyarrow]", float), + # pyarrow float64 + ("float64[pyarrow]", float), + ("double[pyarrow]", float), +] +ASTYPE_FLOAT_ARGS: list[tuple[FloatDtypeArg | PandasAstypeFloatDtypeArg, type]] = [ + *TYPE_FLOAT_ARGS, + ("longdouble", np.longdouble), + ("f16", np.longdouble), + # ("float96", np.longdouble), # NOTE: unsupported + ("float128", np.longdouble), # NOTE: UNIX ONLY +] + def check( actual: T, @@ -244,3 +654,32 @@ def pytest_warns_bounded( if upper_exception is None: return nullcontext() return suppress(upper_exception) + + +def skip_platform( + type_error: Callable[[], Any], dtype: type | str | ExtensionDtype +) -> None: + if platform.system() == "Windows" and dtype in {"f16", "float128"}: + with pytest.raises(TypeError): + type_error() + pytest.skip(f"Windows does not support {dtype}") + if ( + platform.system() == "Darwin" + and platform.processor() == "arm" + and dtype in {"f16", "float128"} + ): + with pytest.raises(TypeError): + type_error() + pytest.skip(f"MacOS arm does not support {dtype}") + + +def get_dtype(dtype: object) -> Generator[type | str, None, None]: + """Extract types and string literals from a Union or Literal type.""" + if isinstance(dtype, str): + yield dtype + elif isinstance(dtype, type) and not str(dtype).startswith("type["): + # isinstance(type[bool], type) in py310, but not in newer versions + yield dtype() if "pandas" in str(dtype) else dtype + else: + for arg in get_args(dtype): + yield from get_dtype(arg) diff --git a/tests/arrays/test_arrays.py b/tests/arrays/test_arrays.py deleted file mode 100644 index fd4cca834..000000000 --- a/tests/arrays/test_arrays.py +++ /dev/null @@ -1,12 +0,0 @@ -from pandas.core.arrays.integer import IntegerArray -from pandas.core.construction import array -from typing_extensions import assert_type - -from pandas._libs.missing import NA - -from tests import check - - -def test_construction() -> None: - check(assert_type(array([1]), IntegerArray), IntegerArray) - check(assert_type(array([1, NA]), IntegerArray), IntegerArray) diff --git a/tests/arrays/test_boolean_array.py b/tests/arrays/test_boolean_array.py new file mode 100644 index 000000000..48344f40f --- /dev/null +++ b/tests/arrays/test_boolean_array.py @@ -0,0 +1,29 @@ +import numpy as np +import pandas as pd +from pandas.core.arrays.boolean import BooleanArray +import pytest +from typing_extensions import assert_type + +from tests import ( + PandasBooleanDtypeArg, + check, + get_dtype, +) + + +def test_constructor() -> None: + check(assert_type(pd.array([True]), BooleanArray), BooleanArray) + check(assert_type(pd.array([True, np.bool(True)]), BooleanArray), BooleanArray) + check(assert_type(pd.array([True, None]), BooleanArray), BooleanArray) + check(assert_type(pd.array([True, pd.NA]), BooleanArray), BooleanArray) + + check(assert_type(pd.array(np.array([1], np.bool_)), BooleanArray), BooleanArray) + + check(assert_type(pd.array(pd.array([True])), BooleanArray), BooleanArray) + + pd.array([True], dtype=pd.BooleanDtype()) + + +@pytest.mark.parametrize("dtype", get_dtype(PandasBooleanDtypeArg)) +def test_constructor_dtype(dtype: PandasBooleanDtypeArg): + check(assert_type(pd.array([True], dtype=dtype), BooleanArray), BooleanArray) diff --git a/tests/arrays/test_categorical.py b/tests/arrays/test_categorical.py new file mode 100644 index 000000000..cdfcc15c2 --- /dev/null +++ b/tests/arrays/test_categorical.py @@ -0,0 +1,40 @@ +import numpy as np +import pandas as pd +from pandas.core.arrays.categorical import Categorical +from typing_extensions import assert_type + +from tests import check + + +def test_constructor() -> None: + check(assert_type(pd.array(["🐼"], dtype="category"), Categorical), Categorical) + check( + assert_type(pd.array(np.array(["🐼"]), dtype="category"), Categorical), + Categorical, + ) + check( + assert_type(pd.array(pd.array(["🐼"]), dtype="category"), Categorical), + Categorical, + ) + check( + assert_type(pd.array(pd.Index(["🐼"]), dtype="category"), Categorical), + Categorical, + ) + check( + assert_type(pd.array(pd.Series(["🐼"]), dtype="category"), Categorical), + Categorical, + ) + + check( + assert_type(pd.array(pd.array(["🐼"], dtype="category")), Categorical), + Categorical, + ) + check( + assert_type(pd.array(pd.Index(["🐼"], dtype="category")), Categorical), + Categorical, + ) + # TODO: Categorical Series pandas-dev/pandas-stubs#1415 + # check( + # assert_type(pd.array(pd.Series(["🐼"], dtype="category")), Categorical), + # Categorical, + # ) diff --git a/tests/arrays/test_datetime_array.py b/tests/arrays/test_datetime_array.py new file mode 100644 index 000000000..8f339fcda --- /dev/null +++ b/tests/arrays/test_datetime_array.py @@ -0,0 +1,38 @@ +from datetime import datetime +from typing import cast + +import numpy as np +import pandas as pd +from pandas.core.arrays.datetimes import DatetimeArray +from typing_extensions import assert_type + +from pandas._libs.tslibs.nattype import NaTType + +from tests import check + + +def test_constructor() -> None: + dt = datetime(2025, 11, 10) + check(assert_type(pd.array([dt]), DatetimeArray), DatetimeArray) + check(assert_type(pd.array([dt, pd.Timestamp(dt)]), DatetimeArray), DatetimeArray) + check(assert_type(pd.array([dt, None]), DatetimeArray), DatetimeArray) + check(assert_type(pd.array([dt, pd.NaT, None]), DatetimeArray), DatetimeArray) + + np_dt = np.datetime64(dt) + check(assert_type(pd.array([np_dt]), DatetimeArray), DatetimeArray) + check(assert_type(pd.array([np_dt, None]), DatetimeArray), DatetimeArray) + dt_nat = cast(list[np.datetime64 | NaTType], [np_dt, pd.NaT]) + check(assert_type(pd.array(dt_nat), DatetimeArray), DatetimeArray) + + check( + assert_type( # type: ignore[assert-type] # I do not understand + pd.array(np.array([dt], np.datetime64)), DatetimeArray + ), + DatetimeArray, + ) + + check(assert_type(pd.array(pd.array([dt])), DatetimeArray), DatetimeArray) + + check(assert_type(pd.array(pd.Index([dt])), DatetimeArray), DatetimeArray) + + check(assert_type(pd.array(pd.Series([dt])), DatetimeArray), DatetimeArray) diff --git a/tests/arrays/test_floating_array.py b/tests/arrays/test_floating_array.py new file mode 100644 index 000000000..0ec6ea73a --- /dev/null +++ b/tests/arrays/test_floating_array.py @@ -0,0 +1,35 @@ +import numpy as np +import pandas as pd +from pandas.core.arrays.floating import FloatingArray +import pytest +from typing_extensions import assert_type + +from tests import ( + PandasFloatDtypeArg, + check, + get_dtype, + skip_platform, +) + + +def test_constructor() -> None: + check(assert_type(pd.array([1.0]), FloatingArray), FloatingArray) + check(assert_type(pd.array([1.0, np.float64(1)]), FloatingArray), FloatingArray) + check(assert_type(pd.array([1.0, None]), FloatingArray), FloatingArray) + check(assert_type(pd.array([1.0, pd.NA, None]), FloatingArray), FloatingArray) + + check( + assert_type( # type: ignore[assert-type] # I do not understand + pd.array(np.array([1.0], np.float64)), FloatingArray + ), + FloatingArray, + ) + + check(assert_type(pd.array(pd.array([1.0])), FloatingArray), FloatingArray) + + +@pytest.mark.parametrize("dtype", get_dtype(PandasFloatDtypeArg)) +def test_constructor_dtype(dtype: PandasFloatDtypeArg) -> None: + skip_platform(lambda: pd.array([1.0], dtype=dtype), dtype) + + check(assert_type(pd.array([True], dtype=dtype), FloatingArray), FloatingArray) diff --git a/tests/arrays/test_integer_array.py b/tests/arrays/test_integer_array.py new file mode 100644 index 000000000..a5973eac6 --- /dev/null +++ b/tests/arrays/test_integer_array.py @@ -0,0 +1,22 @@ +import numpy as np +import pandas as pd +from pandas.core.arrays.integer import IntegerArray +from typing_extensions import assert_type + +from tests import check + + +def test_constructor() -> None: + check(assert_type(pd.array([1]), IntegerArray), IntegerArray) + check(assert_type(pd.array([1, np.int64(1)]), IntegerArray), IntegerArray) + check(assert_type(pd.array([1, None]), IntegerArray), IntegerArray) + check(assert_type(pd.array([1, pd.NA, None]), IntegerArray), IntegerArray) + + check( + assert_type( # type: ignore[assert-type] # I do not understand + pd.array(np.array([1], np.int64)), IntegerArray + ), + IntegerArray, + ) + + check(assert_type(pd.array(pd.array([1])), IntegerArray), IntegerArray) diff --git a/tests/arrays/test_interval_array.py b/tests/arrays/test_interval_array.py new file mode 100644 index 000000000..b8ac9e868 --- /dev/null +++ b/tests/arrays/test_interval_array.py @@ -0,0 +1,17 @@ +import pandas as pd +from pandas.core.arrays.interval import IntervalArray +from typing_extensions import assert_type + +from tests import check + + +def test_constructor() -> None: + itv = pd.Interval(0, 1) + check(assert_type(pd.array([itv]), IntervalArray), IntervalArray) + check(assert_type(pd.array([itv, None]), IntervalArray), IntervalArray) + + check(assert_type(pd.array(pd.array([itv])), IntervalArray), IntervalArray) + + check(assert_type(pd.array(pd.Index([itv])), IntervalArray), IntervalArray) + + check(assert_type(pd.array(pd.Series([itv])), IntervalArray), IntervalArray) diff --git a/tests/arrays/test_numpy_extension_array.py b/tests/arrays/test_numpy_extension_array.py new file mode 100644 index 000000000..d8990dd9e --- /dev/null +++ b/tests/arrays/test_numpy_extension_array.py @@ -0,0 +1,55 @@ +import numpy as np +import pandas as pd +from pandas.core.arrays.numpy_ import NumpyExtensionArray +import pytest +from typing_extensions import assert_type + +from tests import ( + BuiltinDtypeArg, + NumpyNotTimeDtypeArg, + check, + get_dtype, + skip_platform, +) + + +def test_constructor() -> None: + # check( + # assert_type(pd.array([pd.NA, None]), NumpyExtensionArray), NumpyExtensionArray + # ) + + check( + assert_type( # type: ignore[assert-type] # I do not understand + pd.array([1, "🐼"]), NumpyExtensionArray + ), + NumpyExtensionArray, + ) + check( + assert_type( # type: ignore[assert-type] # I do not understand + pd.array(np.array([1, "🐼"], np.object_)), NumpyExtensionArray + ), + NumpyExtensionArray, + ) + # check( + # assert_type(pd.array(pd.array([pd.NA, None])), NumpyExtensionArray), + # NumpyExtensionArray, + # ) + check( + assert_type(pd.array(pd.RangeIndex(0, 1)), NumpyExtensionArray), + NumpyExtensionArray, + ) + + +@pytest.mark.parametrize("dtype", get_dtype(BuiltinDtypeArg | NumpyNotTimeDtypeArg)) +def test_constructor_dtype(dtype: BuiltinDtypeArg | NumpyNotTimeDtypeArg): + if dtype == "V" or "void" in str(dtype): + check( + assert_type(pd.array([b"1"], dtype=dtype), NumpyExtensionArray), + NumpyExtensionArray, + ) + else: + skip_platform(lambda: pd.array([1], dtype=dtype), dtype) + check( + assert_type(pd.array([1], dtype=dtype), NumpyExtensionArray), + NumpyExtensionArray, + ) diff --git a/tests/arrays/test_period_array.py b/tests/arrays/test_period_array.py new file mode 100644 index 000000000..a125036b6 --- /dev/null +++ b/tests/arrays/test_period_array.py @@ -0,0 +1,18 @@ +import pandas as pd +from pandas.core.arrays.period import PeriodArray +from typing_extensions import assert_type + +from tests import check + + +def test_constructor() -> None: + prd = pd.Period("2023-01-01") + check(assert_type(pd.array([prd]), PeriodArray), PeriodArray) + check(assert_type(pd.array([prd, None]), PeriodArray), PeriodArray) + check(assert_type(pd.array([prd, pd.NaT, None]), PeriodArray), PeriodArray) + + check(assert_type(pd.array(pd.array([prd])), PeriodArray), PeriodArray) + + check(assert_type(pd.array(pd.Index([prd])), PeriodArray), PeriodArray) + + check(assert_type(pd.array(pd.Series([prd])), PeriodArray), PeriodArray) diff --git a/tests/arrays/test_string_array.py b/tests/arrays/test_string_array.py new file mode 100644 index 000000000..68654b971 --- /dev/null +++ b/tests/arrays/test_string_array.py @@ -0,0 +1,24 @@ +import numpy as np +import pandas as pd +from pandas.core.arrays.string_ import StringArray +from typing_extensions import assert_type + +from tests import ( + TYPE_CHECKING_INVALID_USAGE, + check, +) + + +def test_constructor() -> None: + check(assert_type(pd.array(["🐼"]), StringArray), StringArray) + check( + assert_type(pd.array(["🐼", np.str_("🐼")]), StringArray), + StringArray, + ) + check(assert_type(pd.array(["🐼", None]), StringArray), StringArray) + check(assert_type(pd.array(["🐼", pd.NA, None]), StringArray), StringArray) + + check(assert_type(pd.array(pd.array(["🐼"])), StringArray), StringArray) + + if TYPE_CHECKING_INVALID_USAGE: + pd.array("🐼🎫") # type: ignore[arg-type] # pyright: ignore[reportArgumentType,reportCallIssue] diff --git a/tests/arrays/test_timedelta_array.py b/tests/arrays/test_timedelta_array.py new file mode 100644 index 000000000..70d645eae --- /dev/null +++ b/tests/arrays/test_timedelta_array.py @@ -0,0 +1,33 @@ +from datetime import timedelta + +import numpy as np +import pandas as pd +from pandas.core.arrays.timedeltas import TimedeltaArray +from typing_extensions import assert_type + +from tests import check + + +def test_constructor() -> None: + td = timedelta(2025, 11, 10) + np_dt = np.timedelta64(td) + check(assert_type(pd.array([td]), TimedeltaArray), TimedeltaArray) + check( + assert_type(pd.array([td, pd.Timedelta(td), np_dt]), TimedeltaArray), + TimedeltaArray, + ) + check(assert_type(pd.array([td, None]), TimedeltaArray), TimedeltaArray) + check(assert_type(pd.array([td, pd.NaT, None]), TimedeltaArray), TimedeltaArray) + + check( + assert_type( # type: ignore[assert-type] # I do not understand + pd.array(np.array([td], np.timedelta64)), TimedeltaArray + ), + TimedeltaArray, + ) + + check(assert_type(pd.array(pd.array([td])), TimedeltaArray), TimedeltaArray) + + check(assert_type(pd.array(pd.Index([td])), TimedeltaArray), TimedeltaArray) + + check(assert_type(pd.array(pd.Series([td])), TimedeltaArray), TimedeltaArray) diff --git a/tests/series/test_series.py b/tests/series/test_series.py index 4e34a712e..5d5c96bbc 100644 --- a/tests/series/test_series.py +++ b/tests/series/test_series.py @@ -54,9 +54,13 @@ from pandas.core.dtypes.dtypes import CategoricalDtype # noqa F401 from tests import ( + ASTYPE_FLOAT_ARGS, PD_LTE_23, TYPE_CHECKING_INVALID_USAGE, WINDOWS, + PandasAstypeComplexDtypeArg, + PandasAstypeTimedeltaDtypeArg, + PandasAstypeTimestampDtypeArg, check, ensure_clean, np_1darray, @@ -90,7 +94,6 @@ BytesDtypeArg, CategoryDtypeArg, ComplexDtypeArg, - FloatDtypeArg, IntDtypeArg, ObjectDtypeArg, StrDtypeArg, @@ -2360,50 +2363,10 @@ def test_change_to_dict_return_type() -> None: ("uint64[pyarrow]", int), ] -ASTYPE_FLOAT_ARGS: list[tuple[FloatDtypeArg, type]] = [ - # python float - (float, np.floating), - ("float", np.floating), - # pandas Float32 - (pd.Float32Dtype(), np.float32), - ("Float32", np.float32), - # pandas Float64 - (pd.Float64Dtype(), np.float64), - ("Float64", np.float64), - # numpy float16 - (np.half, np.half), - ("half", np.half), - ("e", np.half), - ("float16", np.float16), - ("f2", np.float16), - # numpy float32 - (np.single, np.single), - ("single", np.single), - ("f", np.single), - ("float32", np.float32), - ("f4", np.float32), - # numpy float64 - (np.double, np.double), - ("double", np.double), - ("d", np.double), - ("float64", np.float64), - ("f8", np.float64), - # numpy float128 - (np.longdouble, np.longdouble), - ("longdouble", np.longdouble), - ("g", np.longdouble), - ("f16", np.longdouble), - # ("float96", np.longdouble), # NOTE: unsupported - ("float128", np.longdouble), # NOTE: UNIX ONLY - # pyarrow float32 - ("float32[pyarrow]", float), - ("float[pyarrow]", float), - # pyarrow float64 - ("float64[pyarrow]", float), - ("double[pyarrow]", float), -] -ASTYPE_COMPLEX_ARGS: list[tuple[ComplexDtypeArg, type]] = [ +ASTYPE_COMPLEX_ARGS: list[ + tuple[ComplexDtypeArg | PandasAstypeComplexDtypeArg, type] +] = [ # python complex (complex, np.complexfloating), ("complex", np.complexfloating), @@ -2429,7 +2392,9 @@ def test_change_to_dict_return_type() -> None: ] -ASTYPE_TIMESTAMP_ARGS: list[tuple[TimestampDtypeArg, type]] = [ +ASTYPE_TIMESTAMP_ARGS: list[ + tuple[TimestampDtypeArg | PandasAstypeTimestampDtypeArg, type] +] = [ # numpy datetime64 ("datetime64[Y]", datetime.datetime), ("datetime64[M]", datetime.datetime), @@ -2486,7 +2451,9 @@ def test_change_to_dict_return_type() -> None: ] -ASTYPE_TIMEDELTA_ARGS: list[tuple[TimedeltaDtypeArg, type]] = [ +ASTYPE_TIMEDELTA_ARGS: list[ + tuple[TimedeltaDtypeArg | PandasAstypeTimedeltaDtypeArg, type] +] = [ # numpy timedelta64 ("timedelta64[Y]", datetime.timedelta), ("timedelta64[M]", datetime.timedelta), @@ -2734,68 +2701,6 @@ def test_astype_uint(cast_arg: IntDtypeArg, target_type: type) -> None: assert_type(s.astype("uint64[pyarrow]"), "pd.Series[int]") -@pytest.mark.parametrize("cast_arg, target_type", ASTYPE_FLOAT_ARGS, ids=repr) -def test_astype_float(cast_arg: FloatDtypeArg, target_type: type) -> None: - s = pd.Series([1, 2, 3]) - - if platform.system() == "Windows" and cast_arg in ("f16", "float128"): - with pytest.raises(TypeError): - s.astype(cast_arg) - pytest.skip("Windows does not support float128") - - if ( - platform.system() == "Darwin" - and platform.processor() == "arm" - and cast_arg in ("f16", "float128") - ): - with pytest.raises(TypeError): - s.astype(cast_arg) - pytest.skip("MacOS arm does not support float128") - - check(s.astype(cast_arg), pd.Series, target_type) - - if TYPE_CHECKING: - # python float - assert_type(s.astype(float), "pd.Series[float]") - assert_type(s.astype("float"), "pd.Series[float]") - # pandas Float32 - assert_type(s.astype(pd.Float32Dtype()), "pd.Series[float]") - assert_type(s.astype("Float32"), "pd.Series[float]") - # pandas Float64 - assert_type(s.astype(pd.Float64Dtype()), "pd.Series[float]") - assert_type(s.astype("Float64"), "pd.Series[float]") - # numpy float16 - assert_type(s.astype(np.half), "pd.Series[float]") - assert_type(s.astype("half"), "pd.Series[float]") - assert_type(s.astype("float16"), "pd.Series[float]") - assert_type(s.astype("e"), "pd.Series[float]") - assert_type(s.astype("f2"), "pd.Series[float]") - # numpy float32 - assert_type(s.astype(np.single), "pd.Series[float]") - assert_type(s.astype("single"), "pd.Series[float]") - assert_type(s.astype("float32"), "pd.Series[float]") - assert_type(s.astype("f"), "pd.Series[float]") - assert_type(s.astype("f4"), "pd.Series[float]") - # numpy float64 - assert_type(s.astype(np.double), "pd.Series[float]") - assert_type(s.astype("double"), "pd.Series[float]") - assert_type(s.astype("float64"), "pd.Series[float]") - assert_type(s.astype("d"), "pd.Series[float]") - assert_type(s.astype("f8"), "pd.Series[float]") - # numpy float128 - assert_type(s.astype(np.longdouble), "pd.Series[float]") - assert_type(s.astype("longdouble"), "pd.Series[float]") - assert_type(s.astype("float128"), "pd.Series[float]") - assert_type(s.astype("g"), "pd.Series[float]") - assert_type(s.astype("f16"), "pd.Series[float]") - # pyarrow float32 - assert_type(s.astype("float32[pyarrow]"), "pd.Series[float]") - assert_type(s.astype("float[pyarrow]"), "pd.Series[float]") - # pyarrow float64 - assert_type(s.astype("float64[pyarrow]"), "pd.Series[float]") - assert_type(s.astype("double[pyarrow]"), "pd.Series[float]") - - @pytest.mark.parametrize("cast_arg, target_type", ASTYPE_COMPLEX_ARGS, ids=repr) def test_astype_complex(cast_arg: ComplexDtypeArg, target_type: type) -> None: s = pd.Series([1, 2, 3]) diff --git a/tests/series/test_series_float.py b/tests/series/test_series_float.py new file mode 100644 index 000000000..24395474b --- /dev/null +++ b/tests/series/test_series_float.py @@ -0,0 +1,104 @@ +from typing import TYPE_CHECKING + +import numpy as np +import pandas as pd +import pytest +from typing_extensions import assert_type + +from tests import ( + ASTYPE_FLOAT_ARGS, + TYPE_FLOAT_ARGS, + FloatDtypeArg, + check, + skip_platform, +) + + +def test_constructor() -> None: + check(assert_type(pd.Series([1.0]), "pd.Series[float]"), pd.Series, np.floating) + check( + assert_type(pd.Series([1.0, np.float64(1)]), "pd.Series[float]"), + pd.Series, + np.floating, + ) + check( + assert_type(pd.Series(np.array([1.0], np.float64)), "pd.Series[float]"), + pd.Series, + np.floating, + ) + check( + assert_type(pd.Series(pd.array([1.0])), "pd.Series[float]"), + pd.Series, + np.floating, + ) + check( + assert_type(pd.Series(pd.Index([1.0])), "pd.Series[float]"), + pd.Series, + np.floating, + ) + check( + assert_type(pd.Series(pd.Series([1.0])), "pd.Series[float]"), + pd.Series, + np.floating, + ) + + +@pytest.mark.parametrize(("dtype", "np_dtype"), TYPE_FLOAT_ARGS) +def test_constructor_dtype(dtype: FloatDtypeArg, np_dtype: type) -> None: + skip_platform(lambda: pd.Series([1.0], dtype=dtype), dtype) + + check( + assert_type(pd.Series([1.0], dtype=dtype), "pd.Series[float]"), + pd.Series, + np_dtype, + ) + + +@pytest.mark.parametrize("cast_arg, target_type", ASTYPE_FLOAT_ARGS, ids=repr) +def test_astype_float(cast_arg: FloatDtypeArg, target_type: type) -> None: + s = pd.Series([1, 2, 3]) + + skip_platform(lambda: s.astype(cast_arg), cast_arg) + + check(s.astype(cast_arg), pd.Series, target_type) + + if TYPE_CHECKING: + # python float + assert_type(s.astype(float), "pd.Series[float]") + assert_type(s.astype("float"), "pd.Series[float]") + # pandas Float32 + assert_type(s.astype(pd.Float32Dtype()), "pd.Series[float]") + assert_type(s.astype("Float32"), "pd.Series[float]") + # pandas Float64 + assert_type(s.astype(pd.Float64Dtype()), "pd.Series[float]") + assert_type(s.astype("Float64"), "pd.Series[float]") + # numpy float16 + assert_type(s.astype(np.half), "pd.Series[float]") + assert_type(s.astype("half"), "pd.Series[float]") + assert_type(s.astype("float16"), "pd.Series[float]") + assert_type(s.astype("e"), "pd.Series[float]") + assert_type(s.astype("f2"), "pd.Series[float]") + # numpy float32 + assert_type(s.astype(np.single), "pd.Series[float]") + assert_type(s.astype("single"), "pd.Series[float]") + assert_type(s.astype("float32"), "pd.Series[float]") + assert_type(s.astype("f"), "pd.Series[float]") + assert_type(s.astype("f4"), "pd.Series[float]") + # numpy float64 + assert_type(s.astype(np.double), "pd.Series[float]") + assert_type(s.astype("double"), "pd.Series[float]") + assert_type(s.astype("float64"), "pd.Series[float]") + assert_type(s.astype("d"), "pd.Series[float]") + assert_type(s.astype("f8"), "pd.Series[float]") + # numpy float128 + assert_type(s.astype(np.longdouble), "pd.Series[float]") + assert_type(s.astype("longdouble"), "pd.Series[float]") + assert_type(s.astype("float128"), "pd.Series[float]") + assert_type(s.astype("g"), "pd.Series[float]") + assert_type(s.astype("f16"), "pd.Series[float]") + # pyarrow float32 + assert_type(s.astype("float32[pyarrow]"), "pd.Series[float]") + assert_type(s.astype("float[pyarrow]"), "pd.Series[float]") + # pyarrow float64 + assert_type(s.astype("float64[pyarrow]"), "pd.Series[float]") + assert_type(s.astype("double[pyarrow]"), "pd.Series[float]")