diff --git a/.github/workflows/test_destinations_local.yml b/.github/workflows/test_destinations_local.yml index b29aa195a9..771921b9c0 100644 --- a/.github/workflows/test_destinations_local.yml +++ b/.github/workflows/test_destinations_local.yml @@ -37,6 +37,7 @@ jobs: destinations: "[\"postgres\", \"duckdb\", \"ducklake\", \"dummy\"]" filesystem_drivers: "[\"memory\", \"file\"]" extras: "--group adbc --extra postgres --extra postgis --extra parquet --extra duckdb --extra cli --extra filesystem" + post_install_commands: "uv run dbc install postgresql" needs_postgres: true # Clickhouse OSS (TODO: test with minio s3) @@ -60,17 +61,17 @@ jobs: - name: sqlalchemy destinations: "[\"sqlalchemy\"]" filesystem_drivers: "[\"memory\", \"file\"]" - extras: "--extra sqlalchemy --extra filesystem --extra parquet" + extras: "--extra sqlalchemy --extra filesystem --extra parquet --group adbc" needs_mysql: true - post_install_commands: "uv run pip install pymysql && uv run pip install sqlalchemy==1.4" + post_install_commands: "uv run pip install pymysql && uv run pip install sqlalchemy==1.4 && uv run dbc install mysql && uv run dbc install sqlite" # SQLAlchemy 2.0 (same as above but with sqlalchemy 2.0) - name: sqlalchemy destinations: "[\"sqlalchemy\"]" filesystem_drivers: "[\"memory\", \"file\"]" - extras: "--extra sqlalchemy --extra filesystem --extra parquet" + extras: "--extra sqlalchemy --extra filesystem --extra parquet --group adbc" needs_mysql: true - post_install_commands: "uv run pip install pymysql && uv run pip install sqlalchemy==2.0" + post_install_commands: "uv run pip install pymysql && uv run pip install sqlalchemy==2.0 && uv run dbc install mysql && uv run dbc install sqlite" env: ACTIVE_DESTINATIONS: ${{ matrix.destinations }} diff --git a/.github/workflows/test_destinations_remote.yml b/.github/workflows/test_destinations_remote.yml index 5b803ee69e..633e0ddaae 100644 --- a/.github/workflows/test_destinations_remote.yml +++ b/.github/workflows/test_destinations_remote.yml @@ -112,8 +112,9 @@ jobs: - name: mssql destinations: "[\"mssql\"]" filesystem_drivers: "[\"memory\"]" - extras: "--extra mssql --extra s3 --extra gs --extra az --extra parquet" + extras: "--extra mssql --extra s3 --extra gs --extra az --extra parquet --group adbc" pre_install_commands: "sudo ACCEPT_EULA=Y apt-get install --yes msodbcsql18" + post_install_commands: "uv run dbc install mssql" always_run_all_tests: true # Synapse @@ -133,6 +134,7 @@ jobs: destinations: "[\"postgres\"]" filesystem_drivers: "[\"memory\", \"file\"]" extras: "--group adbc --extra postgres --extra postgis --extra parquet --extra duckdb" + post_install_commands: "uv pip install adbc-driver-postgresql" # use adbc driver installation always_run_all_tests: true # Qdrant (disabled, because we do not have a test account atm, qdrant is tested with local version) diff --git a/dlt/common/configuration/inject.py b/dlt/common/configuration/inject.py index d0889e6884..dbc4aba529 100644 --- a/dlt/common/configuration/inject.py +++ b/dlt/common/configuration/inject.py @@ -4,7 +4,13 @@ from typing import Callable, Dict, Type, Any, Optional, Union, Tuple, TypeVar, overload, cast from inspect import Signature, Parameter, unwrap -from dlt.common.typing import DictStrAny, TFun, AnyFun +from dlt.common.typing import ( + DictStrAny, + TFun, + AnyFun, + get_type_globals, + resolve_single_annotation, +) from dlt.common.configuration.resolve import resolve_configuration, inject_section from dlt.common.configuration.specs.base_configuration import BaseConfiguration from dlt.common.configuration.specs.config_section_context import ConfigSectionContext @@ -128,12 +134,13 @@ def decorator(f: TFun) -> TFun: spec_arg: Parameter = None section_name_arg: Parameter = None + globalns = get_type_globals(f) for p in sig.parameters.values(): # for all positional parameters that do not have default value, set default # if hasattr(SPEC, p.name) and p.default == Parameter.empty: # p._default = None # type: ignore - if p.annotation is SPEC: + if resolve_single_annotation(p.annotation, globalns=globalns) is SPEC: # if any argument has type SPEC then us it to take initial value spec_arg = p if p.name == section_arg_name: @@ -209,7 +216,7 @@ def update_bound_args( for p in sig.parameters.values(): if p.name in resolved_params: bound_args.arguments[p.name] = resolved_params.pop(p.name) - if p.annotation is SPEC: + if resolve_single_annotation(p.annotation, globalns=globalns) is SPEC: bound_args.arguments[p.name] = config # pass all other config parameters into kwargs if present if kwargs_arg is not None: diff --git a/dlt/common/configuration/specs/base_configuration.py b/dlt/common/configuration/specs/base_configuration.py index 25125b4a3b..ab825d960f 100644 --- a/dlt/common/configuration/specs/base_configuration.py +++ b/dlt/common/configuration/specs/base_configuration.py @@ -38,6 +38,7 @@ Annotated, Self, extract_inner_type, + get_type_globals, is_annotated, is_any_type, is_final_type, @@ -46,6 +47,7 @@ is_union_type, get_args, get_origin, + resolve_single_annotation, ) from dlt.common.data_types import py_type_to_sc_type from dlt.common.configuration.exceptions import ( @@ -190,6 +192,7 @@ def configspec( def wrap(cls: Type[TAnyClass]) -> Type[TAnyClass]: cls.__hint_resolvers__ = {} # type: ignore[attr-defined] is_context = issubclass(cls, _F_ContainerInjectableContext) + # if type does not derive from BaseConfiguration then derive it with contextlib.suppress(NameError): if not issubclass(cls, BaseConfiguration): @@ -211,6 +214,7 @@ def wrap(cls: Type[TAnyClass]) -> Type[TAnyClass]: ) setattr(cls, ann, None) # get all attributes without corresponding annotations + globalns = get_type_globals(cls) for att_name, att_value in list(cls.__dict__.items()): # skip callables, dunder names, class variables and some special names if callable(att_value): @@ -233,9 +237,7 @@ def wrap(cls: Type[TAnyClass]) -> Type[TAnyClass]: # resolve the annotation as per PEP 563 # NOTE: we do not use get_type_hints because at this moment cls is an unknown name # (ie. used as decorator and module is being imported) - if isinstance(hint, str): - hint = eval(hint) - + hint = resolve_single_annotation(hint, globalns=globalns, raise_on_error=True) # context can have any type if not is_valid_hint(hint) and not is_context: raise ConfigFieldTypeHintNotSupported(att_name, cls, hint) @@ -376,8 +378,9 @@ def _get_resolvable_dataclass_fields(cls) -> Iterator[TDtcField]: @classmethod def get_resolvable_fields(cls) -> Dict[str, type]: """Returns a mapping of fields to their type hints. Dunders should not be resolved and are not returned""" + globalns = get_type_globals(cls) return { - f.name: eval(f.type) if isinstance(f.type, str) else f.type + f.name: resolve_single_annotation(f.type, globalns=globalns) for f in cls._get_resolvable_dataclass_fields() } diff --git a/dlt/common/reflection/spec.py b/dlt/common/reflection/spec.py index 301d4f8b75..84b2131ec2 100644 --- a/dlt/common/reflection/spec.py +++ b/dlt/common/reflection/spec.py @@ -11,6 +11,8 @@ TSecretValue, Annotated, SecretSentinel, + get_type_globals, + resolve_single_annotation, ) from dlt.common.configuration import configspec, is_valid_hint, is_secret_hint from dlt.common.configuration.specs import BaseConfiguration @@ -56,6 +58,7 @@ def spec_from_signature( new_fields: Dict[str, Any] = {} sig_base_fields: Dict[str, Any] = {} annotations: Dict[str, Any] = {} + globalns = get_type_globals(f) for p in sig.parameters.values(): # skip *args and **kwargs, skip typical method params @@ -63,7 +66,11 @@ def spec_from_signature( "self", "cls", ]: - field_type = AnyType if p.annotation == Parameter.empty else p.annotation + field_type = ( + AnyType + if p.annotation == Parameter.empty + else resolve_single_annotation(p.annotation, globalns=globalns) + ) # keep the base fields if sig not annotated if ( p.name in base_fields @@ -101,7 +108,10 @@ def spec_from_signature( annotations[p.name] = field_type # set field with default value new_fields[p.name] = p.default - # print(f"Param {p.name} is {field_type}: {p.default} due to {include_defaults} or {type_from_literal}") + # print( + # f"Param {p.name} is {field_type}: {p.default} due to {include_defaults} or" + # f" {type_from_literal}" + # ) signature_fields = {**sig_base_fields, **new_fields} diff --git a/dlt/common/schema/utils.py b/dlt/common/schema/utils.py index fd5df15f93..0e5303137b 100644 --- a/dlt/common/schema/utils.py +++ b/dlt/common/schema/utils.py @@ -696,6 +696,23 @@ def remove_processing_hints(tables: TSchemaTables) -> TSchemaTables: return tables +def has_seen_null_first_hint(column_schema: TColumnSchema) -> bool: + """Checks if `column_schema` has seen seen-null-first hint set to True in the x-normalizer hints.""" + return bool(column_schema.get("x-normalizer", {}).get("seen-null-first")) + + +def remove_seen_null_first_hint(column_schema: TColumnSchema) -> TColumnSchema: + """Removes seen-null-first hint from the x-normalizer hints in `column_schema` in place, + if the x-normalizer section becomes empty after removing the hint, it is also removed, returns the modified input + """ + x_normalizer = column_schema.setdefault("x-normalizer", {}) + if x_normalizer.get("seen-null-first"): + x_normalizer.pop("seen-null-first", None) + if not x_normalizer: + column_schema.pop("x-normalizer", None) + return column_schema + + def get_processing_hints( tables: TSchemaTables, ) -> Tuple[Dict[str, List[str]], Dict[str, Dict[str, List[str]]]]: diff --git a/dlt/common/typing.py b/dlt/common/typing.py index dfd25af3c3..1a2e29b317 100644 --- a/dlt/common/typing.py +++ b/dlt/common/typing.py @@ -2,8 +2,9 @@ from datetime import datetime, date # noqa: I251 import inspect import os +import sys from re import Pattern as _REPattern -from types import FunctionType +from types import FunctionType, ModuleType from typing import ( Callable, ClassVar, @@ -535,3 +536,72 @@ def add_value_to_literal(literal: Any, value: Any) -> None: if value not in type_args: type_args += (value,) literal.__args__ = type_args + + +def get_type_globals(obj: Any) -> Dict[str, Any]: + """ + Best-effort extraction of globals() associated with a type. If object is passed, + we get its __class__ + + Handles: + - functions + - classes (including TypedDict, dataclasses, Pydantic models, etc.) + - modules (returns their __dict__) + """ + + # 1. Module: just return its dict + if isinstance(obj, ModuleType): + return obj.__dict__ + + # 2. Function or bound/unbound method + if inspect.isfunction(obj): + return obj.__globals__ + + # 3. Class (includes TypedDict, dataclasses, normal classes, etc.) + if not inspect.isclass(obj): + obj = obj.__class__ + + if mod := sys.modules.get(obj.__module__): + return mod.__dict__ + return {} + + +def resolve_single_annotation( + ann: Any, + *, + globalns: Optional[Dict[str, Any]] = None, + localns: Optional[Dict[str, Any]] = None, + raise_on_error: bool = False, +) -> Any: + """ + Resolves annotation `ann` if it is a str and/or ForwardRef. + - If `ann` is not a str or ForwardRef, it's returned unchanged. + - If it *is* a str/ForwardRef, we eval it in an appropriate namespace. + """ + + # fast path: already a real type + if not isinstance(ann, (str, ForwardRef)): + return ann + + # extract the expression and module from ForwardRef if needed + expr: str + if isinstance(ann, ForwardRef): + expr = ann.__forward_arg__ + if ( + module := sys.modules.get(getattr(ann, "__forward_module__", None)) + ) and globalns is None: + globalns = module.__dict__ + else: + expr = ann + + try: + ann = eval(expr, globalns, localns) + if isinstance(ann, ForwardRef): + ann = resolve_single_annotation( + ann, globalns=globalns, localns=localns, raise_on_error=raise_on_error + ) + except Exception: + if raise_on_error: + raise + + return ann diff --git a/dlt/common/validation.py b/dlt/common/validation.py index 46f68c5a9b..8b71ba9196 100644 --- a/dlt/common/validation.py +++ b/dlt/common/validation.py @@ -1,3 +1,4 @@ +import sys import functools import inspect from typing import Callable, Any, List, Type @@ -61,7 +62,10 @@ def validate_dict( # TODO: get_type_hints is very slow and we possibly should cache the result # even better option is to rewrite "verify_prop" so we can cache annotations mapper to validators # so we do not check the types of typeddict keys all the time - allowed_props = get_type_hints(spec) + allowed_props = get_type_hints( + spec, + globalns=sys.modules[spec.__module__].__dict__ if spec.__module__ in sys.modules else None, + ) required_props = {k: v for k, v in allowed_props.items() if not is_optional_type(v)} # remove optional props props = {k: v for k, v in doc.items() if filter_f(k)} diff --git a/dlt/destinations/_adbc_jobs.py b/dlt/destinations/_adbc_jobs.py new file mode 100644 index 0000000000..3cf9c5a4f7 --- /dev/null +++ b/dlt/destinations/_adbc_jobs.py @@ -0,0 +1,145 @@ +from __future__ import annotations + +import functools +from abc import ABC, abstractmethod +from typing import Iterator, TYPE_CHECKING, Sequence, Tuple + +from dlt.common import logger +from dlt.common.configuration.inject import with_config +from dlt.common.destination.capabilities import LoaderFileFormatSelector +from dlt.common.schema.typing import TTableSchema +from dlt.common.typing import TLoaderFileFormat +from dlt.common.utils import without_none +from dlt.destinations.job_client_impl import SqlJobClientBase + +if TYPE_CHECKING: + from adbc_driver_manager.dbapi import Connection + +from dlt.common.destination.client import HasFollowupJobs, RunnableLoadJob + + +# TODO: driver presence detection, driver location detection to support (see postgres factory) +# dbc and pip install drivers, connection string conversion etc. should be extracted to ADBC +# lib helper (like we do with sqlalchemy or arrow) + + +@with_config +def has_adbc_driver(driver: str, disable_adbc_detection: bool = False) -> Tuple[bool, str]: + """Figures out if given driver is available without actually connecting to destination. + Allows to disable via `disable_adbc_detection` setting in dlt config + """ + if disable_adbc_detection: + return False, None + try: + import adbc_driver_manager as dm + + try: + db = dm.AdbcDatabase(driver=driver, uri="") + db.close() + return True, None + except dm.Error as pex: + # NOT_FOUND returned when driver library can't be found + if pex.status_code in (dm.AdbcStatusCode.NOT_FOUND, dm.AdbcStatusCode.NOT_IMPLEMENTED): + return False, str(pex) + return True, str(pex) + except ImportError as import_ex: + return False, str(import_ex) + + +class AdbcParquetCopyJob(RunnableLoadJob, HasFollowupJobs, ABC): + def __init__(self, file_path: str) -> None: + super().__init__(file_path) + self._job_client: SqlJobClientBase = None + # override default schema handling + self._connect_catalog_name: str = None + self._connect_schema_name: str = None + + @abstractmethod + def _connect(self) -> Connection: + pass + + def _set_catalog_and_schema(self) -> Tuple[str, str]: + catalog_name = self._connect_catalog_name + if catalog_name is None: + catalog_name = self._job_client.sql_client.catalog_name(quote=False) + elif catalog_name == "": + # empty string disables catalog + catalog_name = None + + schema_name = self._connect_schema_name + if schema_name is None: + schema_name = self._job_client.sql_client.escape_column_name( + self._job_client.sql_client.dataset_name, quote=False, casefold=True + ) + elif schema_name == "": + # empty string disables schema + schema_name = None + + return catalog_name, schema_name + + def run(self) -> None: + from dlt.common.libs.pyarrow import pq_stream_with_new_columns + from dlt.common.libs.pyarrow import pyarrow + + def _iter_batches(file_path: str) -> Iterator[pyarrow.RecordBatch]: + for table in pq_stream_with_new_columns(file_path, ()): + yield from table.to_batches() + + with self._connect() as conn, conn.cursor() as cur: + import time + + catalog_name, schema_name = self._set_catalog_and_schema() + kwargs = dict(catalog_name=catalog_name, db_schema_name=schema_name) + + t_ = time.time() + rows = cur.adbc_ingest( + self.load_table_name, + _iter_batches(self._file_path), + mode="append", + **without_none(kwargs), # type: ignore[arg-type,unused-ignore] + ) + conn.commit() + logger.info( + f"{rows} rows copied from {self._file_name} to" + f" {self.load_table_name}.{schema_name} in {time.time()-t_} s" + ) + + +def _loader_file_format_selector( + driver: str, + docs_url: str, + prefer_parquet: bool, + preferred_loader_file_format: TLoaderFileFormat, + supported_loader_file_formats: Sequence[TLoaderFileFormat], + /, + *, + table_schema: TTableSchema, +) -> Tuple[TLoaderFileFormat, Sequence[TLoaderFileFormat]]: + found, err_str = has_adbc_driver(driver) + if not found: + supported_loader_file_formats = list(supported_loader_file_formats) + supported_loader_file_formats.remove("parquet") + + if table_schema.get("file_format") == "parquet": + logger.warning( + f"parquet file format was requested for table {table_schema['name']} but ADBC" + f" driver for {driver} was not installed:\n {err_str}\n" + " Read more: " + + docs_url + ) + else: + if prefer_parquet: + # parquet is preferred format if driver is enabled + preferred_loader_file_format = "parquet" + + return (preferred_loader_file_format, supported_loader_file_formats) + + +def make_adbc_parquet_file_format_selector( + driver: str, + docs_url: str, + prefer_parquet: bool, +) -> LoaderFileFormatSelector: + """Factory for file format selector that removes parquet from the list if `driver` not installed""" + + return functools.partial(_loader_file_format_selector, driver, docs_url, prefer_parquet) # type: ignore[return-value] diff --git a/dlt/destinations/impl/mssql/configuration.py b/dlt/destinations/impl/mssql/configuration.py index 1332563974..b17b3e5b3d 100644 --- a/dlt/destinations/impl/mssql/configuration.py +++ b/dlt/destinations/impl/mssql/configuration.py @@ -71,7 +71,7 @@ def _get_driver(self) -> str: f" how to install the `{self.SUPPORTED_DRIVERS[0]}` on your platform." ) - def _get_odbc_dsn_dict(self) -> Dict[str, Any]: + def get_odbc_dsn_dict(self) -> Dict[str, Any]: params = { "DRIVER": self.driver, "SERVER": f"{self.host},{self.port}", @@ -84,7 +84,7 @@ def _get_odbc_dsn_dict(self) -> Dict[str, Any]: return params def to_odbc_dsn(self) -> str: - params = self._get_odbc_dsn_dict() + params = self.get_odbc_dsn_dict() return ";".join([f"{k}={v}" for k, v in params.items()]) diff --git a/dlt/destinations/impl/mssql/factory.py b/dlt/destinations/impl/mssql/factory.py index 5af4cf5ce3..d54360af21 100644 --- a/dlt/destinations/impl/mssql/factory.py +++ b/dlt/destinations/impl/mssql/factory.py @@ -9,6 +9,7 @@ from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE from dlt.common.schema.typing import TColumnSchema, TColumnType +from dlt.destinations._adbc_jobs import make_adbc_parquet_file_format_selector from dlt.destinations.type_mapping import TypeMapperImpl from dlt.destinations.impl.mssql.configuration import MsSqlCredentials, MsSqlClientConfiguration @@ -117,7 +118,12 @@ class mssql(Destination[MsSqlClientConfiguration, "MsSqlJobClient"]): def _raw_capabilities(self) -> DestinationCapabilitiesContext: caps = DestinationCapabilitiesContext() caps.preferred_loader_file_format = "insert_values" - caps.supported_loader_file_formats = ["insert_values", "model"] + caps.supported_loader_file_formats = ["insert_values", "parquet", "model"] + caps.loader_file_format_selector = make_adbc_parquet_file_format_selector( + "mssql", + "https://dlthub.com/docs/dlt-ecosystem/destinations/mssql#data-loading", + prefer_parquet=True, + ) caps.preferred_staging_file_format = None caps.supported_staging_file_formats = [] caps.type_mapper = MsSqlTypeMapper diff --git a/dlt/destinations/impl/mssql/mssql.py b/dlt/destinations/impl/mssql/mssql.py index 470c7b2812..0c75c62b4c 100644 --- a/dlt/destinations/impl/mssql/mssql.py +++ b/dlt/destinations/impl/mssql/mssql.py @@ -1,13 +1,19 @@ -from typing import Dict, Optional, Sequence, List, Any +from typing import TYPE_CHECKING, Dict, Iterator, Optional, Sequence, List, Any +from dlt.common import logger from dlt.common.destination.client import ( FollowupJobRequest, + HasFollowupJobs, + LoadJob, PreparedTableSchema, + RunnableLoadJob, ) from dlt.common.destination import DestinationCapabilitiesContext from dlt.common.schema import TColumnSchema, TColumnHint, Schema from dlt.common.schema.typing import TColumnType +from dlt.common.storages.load_package import ParsedLoadJobFileName +from dlt.destinations._adbc_jobs import AdbcParquetCopyJob from dlt.destinations.sql_jobs import SqlStagingReplaceFollowupJob, SqlMergeFollowupJob from dlt.destinations.insert_job_client import InsertValuesJobClient @@ -76,6 +82,69 @@ def _new_temp_table_name(cls, table_name: str, op: str, sql_client: SqlClientBas return SqlMergeFollowupJob._new_temp_table_name("#" + table_name, op, sql_client) +class MssqlParquetCopyJob(AdbcParquetCopyJob): + _config: MsSqlClientConfiguration + + if TYPE_CHECKING: + from adbc_driver_manager.dbapi import Connection + + def _connect(self) -> "Connection": + from adbc_driver_manager import dbapi + + self._config = self._job_client.config # type: ignore[assignment] + conn_dsn = self.odbc_to_go_mssql_dsn(self._config.credentials.get_odbc_dsn_dict()) + conn_str = ";".join([f"{k}={v}" for k, v in conn_dsn.items()]) + logger.info(f"ADBC connect to {conn_str}") + return dbapi.connect(driver="mssql", db_kwargs={"uri": conn_str}) + + @staticmethod + def odbc_to_go_mssql_dsn(dsn: Dict[str, Any]) -> Dict[str, Any]: + """Converts odbc connection string to go connection string used by ADBC""" + # DSN keys are already normalized to upper case + result: Dict[str, Any] = {} + + for upper, value in dsn.items(): + if value is None: + continue + + v = str(value) + + if upper == "ENCRYPT": + v = v.strip().lower() + + # ODBC: yes/mandatory/true/1 → go-mssqldb: true (TLS on) + if v in {"yes", "true", "1", "mandatory"}: + v = "true" + + # ODBC: strict → go-mssqldb strict (if supported by the driver) + elif v in {"strict"}: + v = "strict" + + # ODBC: optional → go-mssqldb optional (login only) + elif v in {"optional"}: + v = "optional" + + # ODBC: no/false/0/disabled → go-mssqldb disable (no TLS at all) + # This mirrors your previous string hack: + # .replace("=yes", "=1").replace("=no", "=disable") + elif v in {"no", "false", "0", "disabled", "disable"}: + v = "disable" + + elif upper == "TRUSTSERVERCERTIFICATE": + v = v.strip().lower() + + # ODBC uses yes/no; go-mssqldb expects true/false (but is lenient); + # we normalize explicitly. + if v in {"yes", "true", "1"}: + v = "true" + elif v in {"no", "false", "0"}: + v = "false" + + result[upper] = v + + return result + + class MsSqlJobClient(InsertValuesJobClient): def __init__( self, @@ -98,6 +167,16 @@ def __init__( self.active_hints = HINT_TO_MSSQL_ATTR if self.config.create_indexes else {} self.type_mapper = capabilities.get_type_mapper() + def create_load_job( + self, table: PreparedTableSchema, file_path: str, load_id: str, restore: bool = False + ) -> LoadJob: + job = super().create_load_job(table, file_path, load_id, restore) + if not job: + parsed_file = ParsedLoadJobFileName.parse(file_path) + if parsed_file.file_format == "parquet": + job = MssqlParquetCopyJob(file_path) + return job + def _create_merge_followup_jobs( self, table_chain: Sequence[PreparedTableSchema] ) -> List[FollowupJobRequest]: diff --git a/dlt/destinations/impl/postgres/factory.py b/dlt/destinations/impl/postgres/factory.py index aeea5de763..e2a6c7352a 100644 --- a/dlt/destinations/impl/postgres/factory.py +++ b/dlt/destinations/impl/postgres/factory.py @@ -10,6 +10,7 @@ from dlt.common.schema.typing import TColumnSchema, TColumnType, TTableSchema from dlt.common.typing import TLoaderFileFormat from dlt.common.wei import EVM_DECIMAL_PRECISION +from dlt.destinations._adbc_jobs import make_adbc_parquet_file_format_selector from dlt.destinations.impl.postgres.configuration import ( PostgresCredentials, PostgresClientConfiguration, @@ -137,6 +138,11 @@ def postgres_loader_file_format_selector( ) -> Tuple[TLoaderFileFormat, Sequence[TLoaderFileFormat]]: try: # supports adbc for direct parquet loading + # from adbc_driver_manager import _dbapi_backend, dbapi + + # _dbapi_backend._ALL_BACKENDS + # dbapi.connect() + import adbc_driver_postgresql.dbapi except ImportError: supported_loader_file_formats = list(supported_loader_file_formats) @@ -153,6 +159,20 @@ def postgres_loader_file_format_selector( return (preferred_loader_file_format, supported_loader_file_formats) +def get_adbc_driver_location() -> str: + """Detects driver location if PyPI driver package is installed, otherwise falls back to dbc + driver name. + """ + # TODO: move to the (future) ADBC helper module + try: + from adbc_driver_postgresql import _driver_path + + # use driver from installed dependency + return _driver_path() # type: ignore[no-any-return] + except Exception: + return "postgresql" + + class postgres(Destination[PostgresClientConfiguration, "PostgresClient"]): spec = PostgresClientConfiguration @@ -161,7 +181,11 @@ def _raw_capabilities(self) -> DestinationCapabilitiesContext: caps = DestinationCapabilitiesContext() caps.preferred_loader_file_format = "insert_values" caps.supported_loader_file_formats = ["insert_values", "csv", "parquet", "model"] - caps.loader_file_format_selector = postgres_loader_file_format_selector + caps.loader_file_format_selector = make_adbc_parquet_file_format_selector( + get_adbc_driver_location(), + "https://dlthub.com/docs/dlt-ecosystem/destinations/postgres#fast-loading-with-arrow-tables-and-parquet", + prefer_parquet=False, + ) caps.preferred_staging_file_format = None caps.supported_staging_file_formats = [] caps.type_mapper = PostgresTypeMapper diff --git a/dlt/destinations/impl/postgres/postgres.py b/dlt/destinations/impl/postgres/postgres.py index acdff426e4..e4207a5632 100644 --- a/dlt/destinations/impl/postgres/postgres.py +++ b/dlt/destinations/impl/postgres/postgres.py @@ -1,4 +1,4 @@ -from typing import Dict, Iterator, Optional, Sequence, List, Any +from typing import TYPE_CHECKING, Dict, Iterator, Optional, Sequence, List, Any from dlt.common import logger from dlt.common.destination.configuration import CsvFormatConfiguration @@ -18,7 +18,9 @@ from dlt.common.schema.utils import is_nullable_column from dlt.common.storages.file_storage import FileStorage from dlt.common.storages.load_storage import ParsedLoadJobFileName +from dlt.destinations._adbc_jobs import AdbcParquetCopyJob from dlt.destinations.impl.postgres.configuration import PostgresClientConfiguration +from dlt.destinations.impl.postgres.factory import get_adbc_driver_location from dlt.destinations.impl.postgres.sql_client import Psycopg2SqlClient from dlt.destinations.insert_job_client import InsertValuesJobClient from dlt.destinations.sql_client import SqlClientBase @@ -52,36 +54,18 @@ def generate_sql( return sql -class PostgresParquetCopyJob(RunnableLoadJob, HasFollowupJobs): - def __init__(self, file_path: str) -> None: - super().__init__(file_path) - self._job_client: PostgresClient = None +class PostgresParquetCopyJob(AdbcParquetCopyJob): + if TYPE_CHECKING: + from adbc_driver_manager.dbapi import Connection - def run(self) -> None: - self._config = self._job_client.config + def _connect(self) -> "Connection": + from adbc_driver_manager import dbapi - from dlt.common.libs.pyarrow import pq_stream_with_new_columns - from dlt.common.libs.pyarrow import pyarrow - import adbc_driver_postgresql.dbapi as adbapi - - def _iter_batches(file_path: str) -> Iterator[pyarrow.RecordBatch]: - for table in pq_stream_with_new_columns(file_path, ()): - yield from table.to_batches() - - with ( - adbapi.connect(self._config.credentials.to_native_representation()) as conn, - conn.cursor() as cur, - ): - rows = cur.adbc_ingest( - self.load_table_name, - _iter_batches(self._file_path), - mode="append", - db_schema_name=self._job_client.sql_client.fully_qualified_dataset_name( - quote=False - ), - ) - logger.info(f"{rows} rows copied from {self._file_name} to {self.load_table_name}") - conn.commit() + self._config = self._job_client.config + return dbapi.connect( + driver=get_adbc_driver_location(), + db_kwargs={"uri": self._config.credentials.to_native_representation()}, + ) class PostgresCsvCopyJob(RunnableLoadJob, HasFollowupJobs): diff --git a/dlt/destinations/impl/sqlalchemy/db_api_client.py b/dlt/destinations/impl/sqlalchemy/db_api_client.py index dfba7205e9..8a27f3b7fa 100644 --- a/dlt/destinations/impl/sqlalchemy/db_api_client.py +++ b/dlt/destinations/impl/sqlalchemy/db_api_client.py @@ -216,6 +216,7 @@ def _sqlite_create_dataset(self, dataset_name: str) -> None: self.execute_sql(statement, fn=new_db_fn, name=dataset_name) # WAL mode is applied to all currently attached databases self.execute_sql("PRAGMA journal_mode=WAL") + self.execute_sql("PRAGMA synchronous=NORMAL;") self._sqlite_attached_datasets.add(dataset_name) def _sqlite_drop_dataset(self, dataset_name: str) -> None: diff --git a/dlt/destinations/impl/sqlalchemy/load_jobs.py b/dlt/destinations/impl/sqlalchemy/load_jobs.py index bc54280e34..16fcc55816 100644 --- a/dlt/destinations/impl/sqlalchemy/load_jobs.py +++ b/dlt/destinations/impl/sqlalchemy/load_jobs.py @@ -1,8 +1,11 @@ +from __future__ import annotations + from typing import IO, Any, Dict, Iterator, List, Sequence, TYPE_CHECKING, Optional import math import sqlalchemy as sa +from dlt.common import logger from dlt.common.destination.client import ( RunnableLoadJob, HasFollowupJobs, @@ -10,8 +13,9 @@ ) from dlt.common.storages import FileStorage from dlt.common.json import json, PY_DATETIME_DECODERS -from dlt.destinations.sql_jobs import SqlFollowupJob +from dlt.destinations._adbc_jobs import AdbcParquetCopyJob +from dlt.destinations.sql_jobs import SqlFollowupJob from dlt.destinations.impl.sqlalchemy.db_api_client import SqlalchemyClient from dlt.destinations.impl.sqlalchemy.merge_job import SqlalchemyMergeFollowupJob @@ -74,6 +78,93 @@ def run(self) -> None: _sql_client.execute_sql(table.insert(), chunk) +class SqlalchemyParquetADBCJob(AdbcParquetCopyJob): + """ADBC Parquet copy job for SQLAlchemy (sqlite, mysql) with query param handling.""" + + def __init__(self, file_path: str, table: sa.Table) -> None: + super().__init__(file_path) + self._job_client: "SqlalchemyJobClient" = None + self.table = table + + if TYPE_CHECKING: + from adbc_driver_manager.dbapi import Connection + + def _connect(self) -> Connection: + from adbc_driver_manager import dbapi + + engine = self._job_client.config.credentials.engine + dialect = engine.dialect.name.lower() + url = engine.url + + query = dict(url.query or {}) + + if dialect == "sqlite": + # disable schema and catalog when ingest + self._connect_schema_name = "" + self._connect_catalog_name = "" + + # attach directly to dataset sqlite file as "main" + if self._job_client.sql_client.dataset_name == "main": + db_path = url.database + else: + db_path = self._job_client.sql_client._sqlite_dataset_filename( + self._job_client.sql_client.dataset_name + ) + conn_str = f"file:{db_path}" + + if query: + qs = "&".join(f"{k}={v}" for k, v in query.items()) + conn_str = f"{conn_str}?{qs}" + + logger.info(f"ADBC connect to {conn_str}") + conn = dbapi.connect(driver="sqlite", db_kwargs={"uri": conn_str}) + # WAL mode already set, add busy timeout to handle multiple writers + conn.execute("PRAGMA busy_timeout=1000;") + return conn + + elif dialect == "mysql": + # disable schema and catalog when ingest + self._connect_schema_name = "" + self._connect_catalog_name = "" + + # mysql: convert SSL params into go-mysql ADBC parameters + mapped = {} + for k, v in query.items(): + lk = k.lower() + if lk == "ssl_ca": + mapped["tls-ca"] = v + elif lk == "ssl_cert": + mapped["tls-cert"] = v + elif lk == "ssl_key": + mapped["tls-key"] = v + elif lk == "ssl_mode": + mapped["tls"] = v + else: + mapped[k] = v + + username = url.username or "" + password = url.password or "" + auth = f"{username}:{password}@" if username or password else "" + + host = url.host or "localhost" + port = url.port or 3306 + # dataset name is schema name is database name. each database is a schema in mysql + database = self._job_client.sql_client.dataset_name # url.database or "" + + base = f"{auth}tcp({host}:{port})/{database}" + if mapped: + qs = "&".join(f"{k}={v}" for k, v in mapped.items()) + conn_str = f"{base}?{qs}" + else: + conn_str = base + + logger.info(f"ADBC connect to {conn_str}") + return dbapi.connect(driver="mysql", db_kwargs={"uri": conn_str}) + + else: + raise NotImplementedError(f"ADBC not supported for sqlalchemy dialect {dialect}") + + class SqlalchemyParquetInsertJob(SqlalchemyJsonLInsertJob): def _iter_data_item_chunks(self) -> Iterator[Sequence[Dict[str, Any]]]: from dlt.common.libs.pyarrow import ParquetFile diff --git a/dlt/destinations/impl/sqlalchemy/merge_job.py b/dlt/destinations/impl/sqlalchemy/merge_job.py index e6b2584c0d..1b2aa86259 100644 --- a/dlt/destinations/impl/sqlalchemy/merge_job.py +++ b/dlt/destinations/impl/sqlalchemy/merge_job.py @@ -1,7 +1,8 @@ -from typing import Sequence, Tuple, Optional, List, Union +from typing import Sequence, Tuple, Optional, List, Union, cast import operator import sqlalchemy as sa +from dlt.common.typing import TAnyDateTime from dlt.common.utils import uniq_id from dlt.common.destination import PreparedTableSchema, DestinationCapabilitiesContext from dlt.common.schema.utils import ( @@ -374,10 +375,13 @@ def gen_scd2_sql( format_datetime_literal = ( DestinationCapabilitiesContext.generic_capabilities().format_datetime_literal ) - - boundary_ts = ensure_pendulum_datetime_utc( - root_table.get("x-boundary-timestamp", current_load_package()["state"]["created_at"]) # type: ignore[arg-type] + _boundary_ts = cast(Optional[TAnyDateTime], root_table.get("x-boundary-timestamp")) + boundary_ts: TAnyDateTime = ( + _boundary_ts + if _boundary_ts is not None + else current_load_package()["state"]["created_at"] ) + boundary_ts = ensure_pendulum_datetime_utc(boundary_ts) boundary_literal = format_datetime_literal(boundary_ts, caps.timestamp_precision) diff --git a/dlt/destinations/impl/sqlalchemy/sqlalchemy_job_client.py b/dlt/destinations/impl/sqlalchemy/sqlalchemy_job_client.py index 97bb1b1e43..7d66d6da74 100644 --- a/dlt/destinations/impl/sqlalchemy/sqlalchemy_job_client.py +++ b/dlt/destinations/impl/sqlalchemy/sqlalchemy_job_client.py @@ -14,7 +14,6 @@ PreparedTableSchema, FollowupJobRequest, ) -from dlt.destinations.job_client_impl import SqlJobClientWithStagingDataset, SqlLoadJob from dlt.common.destination.capabilities import DestinationCapabilitiesContext from dlt.common.schema import Schema, TTableSchema, TColumnSchema, TSchemaTables from dlt.common.schema.typing import ( @@ -30,11 +29,15 @@ get_columns_names_with_prop, ) from dlt.common.storages.load_storage import ParsedLoadJobFileName + +from dlt.destinations.job_client_impl import SqlJobClientWithStagingDataset +from dlt.destinations._adbc_jobs import has_adbc_driver as adbc_has_driver from dlt.destinations.exceptions import DatabaseUndefinedRelation from dlt.destinations.impl.sqlalchemy.db_api_client import SqlalchemyClient from dlt.destinations.impl.sqlalchemy.configuration import SqlalchemyClientConfiguration from dlt.destinations.impl.sqlalchemy.load_jobs import ( SqlalchemyJsonLInsertJob, + SqlalchemyParquetADBCJob, SqlalchemyParquetInsertJob, SqlalchemyReplaceJob, SqlalchemyMergeFollowupJob, @@ -138,7 +141,11 @@ def create_load_job( return SqlalchemyJsonLInsertJob(file_path, table_obj) elif parsed_file.file_format == "parquet": table_obj = self._to_table_object(table) - return SqlalchemyParquetInsertJob(file_path, table_obj) + # if driver for a given dialect is installed + if adbc_has_driver(self.config.credentials.engine.dialect.name)[0]: + return SqlalchemyParquetADBCJob(file_path, table_obj) + else: + return SqlalchemyParquetInsertJob(file_path, table_obj) return None def complete_load(self, load_id: str) -> None: diff --git a/dlt/destinations/impl/synapse/configuration.py b/dlt/destinations/impl/synapse/configuration.py index eb5610060b..e72c6ecdd7 100644 --- a/dlt/destinations/impl/synapse/configuration.py +++ b/dlt/destinations/impl/synapse/configuration.py @@ -19,8 +19,8 @@ class SynapseCredentials(MsSqlCredentials): # LongAsMax keyword got introduced in ODBC Driver 18 for SQL Server. SUPPORTED_DRIVERS: ClassVar[List[str]] = ["ODBC Driver 18 for SQL Server"] - def _get_odbc_dsn_dict(self) -> Dict[str, Any]: - params = super()._get_odbc_dsn_dict() + def get_odbc_dsn_dict(self) -> Dict[str, Any]: + params = super().get_odbc_dsn_dict() # Long types (text, ntext, image) are not supported on Synapse. # Convert to max types using LongAsMax keyword. # https://stackoverflow.com/a/57926224 diff --git a/dlt/destinations/sql_jobs.py b/dlt/destinations/sql_jobs.py index 9839883fe7..95bdaf58af 100644 --- a/dlt/destinations/sql_jobs.py +++ b/dlt/destinations/sql_jobs.py @@ -4,7 +4,7 @@ from dlt.common.time import ensure_pendulum_datetime_utc from dlt.common.destination import PreparedTableSchema from dlt.common.destination.utils import resolve_merge_strategy -from dlt.common.typing import TypedDict +from dlt.common.typing import TAnyDateTime, TypedDict from dlt.common.schema.typing import ( TSortOrder, @@ -845,12 +845,11 @@ def gen_scd2_sql( DestinationCapabilitiesContext.generic_capabilities().format_datetime_literal ) - boundary_ts = ensure_pendulum_datetime_utc( - root_table.get( # type: ignore[arg-type] - "x-boundary-timestamp", - current_load_package()["state"]["created_at"], - ) - ) + created_at = current_load_package()["state"]["created_at"] + _boundary_ts = cast(Optional[TAnyDateTime], root_table.get("x-boundary-timestamp")) + boundary_ts: TAnyDateTime = _boundary_ts if _boundary_ts is not None else created_at + boundary_ts = ensure_pendulum_datetime_utc(boundary_ts) + boundary_literal = format_datetime_literal( boundary_ts, caps.timestamp_precision, diff --git a/dlt/extract/hints.py b/dlt/extract/hints.py index ac771bcf07..623a26898d 100644 --- a/dlt/extract/hints.py +++ b/dlt/extract/hints.py @@ -830,6 +830,8 @@ def validate_write_disposition_hint(template: TResourceHints) -> None: ): continue # None is allowed for active_record_timestamp if ts in wd: + if wd[ts] is None: # type: ignore[literal-required] + continue try: ensure_pendulum_datetime_utc(wd[ts]) # type: ignore[literal-required] except Exception: diff --git a/dlt/extract/incremental/__init__.py b/dlt/extract/incremental/__init__.py index bc882f9d7a..7b3608031b 100644 --- a/dlt/extract/incremental/__init__.py +++ b/dlt/extract/incremental/__init__.py @@ -31,6 +31,7 @@ is_subclass, TColumnNames, TypedDict, + resolve_single_annotation, ) from dlt.common.configuration import configspec, ConfigurationValueError from dlt.common.configuration.specs import BaseConfiguration @@ -692,7 +693,9 @@ def should_wrap(sig: inspect.Signature) -> bool: def get_incremental_arg(sig: inspect.Signature) -> Optional[inspect.Parameter]: incremental_param: Optional[inspect.Parameter] = None for p in sig.parameters.values(): - annotation = extract_inner_type(p.annotation) + annotation = extract_inner_type( + resolve_single_annotation(p.annotation, globalns=globals()) + ) if is_subclass(annotation, Incremental) or isinstance(p.default, Incremental): incremental_param = p break diff --git a/dlt/extract/pipe.py b/dlt/extract/pipe.py index d009fd982e..3c4f908d52 100644 --- a/dlt/extract/pipe.py +++ b/dlt/extract/pipe.py @@ -16,7 +16,7 @@ ) from dlt.common.reflection.inspect import isasyncgenfunction, isgeneratorfunction -from dlt.common.typing import AnyFun, AnyType, TDataItems +from dlt.common.typing import AnyFun, AnyType, TDataItems, resolve_single_annotation from dlt.common.utils import get_callable_name, uniq_id from dlt.extract.exceptions import ( @@ -348,10 +348,13 @@ def _should_eval_on_bind(self, gen: Any, sig: inspect.Signature) -> bool: # below we import DltResource but Pipe class should not be dependent on it from dlt.extract.resource import DltResource - if sig.return_annotation != inspect.Signature.empty and inspect.isclass( - sig.return_annotation - ): - return issubclass(sig.return_annotation, DltResource) + if sig.return_annotation != inspect.Signature.empty: + # globals will contain DltResource which we want to resolve + return_annotation = resolve_single_annotation( + sig.return_annotation, globalns=globals() + ) + if inspect.isclass(return_annotation): + return issubclass(return_annotation, DltResource) return False diff --git a/dlt/normalize/items_normalizers.py b/dlt/normalize/items_normalizers.py index b0f71be48a..8acced9ac0 100644 --- a/dlt/normalize/items_normalizers.py +++ b/dlt/normalize/items_normalizers.py @@ -17,7 +17,6 @@ from dlt.common.normalizers.json.relational import DataItemNormalizer as RelationalNormalizer from dlt.common.normalizers.json.helpers import get_root_row_id_type from dlt.common.runtime import signals -from dlt.common.schema import utils from dlt.common.schema.typing import ( C_DLT_ID, C_DLT_LOAD_ID, @@ -32,7 +31,10 @@ dlt_load_id_column, has_table_seen_data, normalize_table_identifiers, + is_nested_table, + has_seen_null_first_hint, ) +from dlt.common.schema import utils from dlt.common.schema.exceptions import CannotCoerceColumnException, CannotCoerceNullException from dlt.common.time import normalize_timezone from dlt.common.utils import read_dialect_and_sql @@ -469,6 +471,7 @@ def _normalize_chunk( else parent_table or table_name ), # parent_table, if present, exists in the schema ) + partial_table, filters = schema.apply_schema_contract( schema_contract, partial_table, data_item=row ) @@ -520,8 +523,45 @@ def _normalize_chunk( pass # kill job if signalled signals.raise_if_signalled() + + self._clean_seen_null_first_hint(schema_update) + return schema_update + def _clean_seen_null_first_hint(self, schema_update: TSchemaUpdate) -> None: + """ + Performs schema and schema update cleanup related to `seen-null-first` hints by + removing entire columns with `seen-null-first` hints from parent tables + when those columns have been converted to nested tables. + + NOTE: The `seen-null-first` hint is used during schema inference to track columns + that were first encountered with null values. In cases where subsequent + non-null values create a nested table, the entire + column with the `seen-null-first` hint in parent table becomes obsolete. + + Args: + schema_update (TSchemaUpdate): Dictionary mapping table names to their table updates. + """ + schema_update_copy = schema_update.copy() + for table_name, table_updates in schema_update_copy.items(): + last_ident_path = self._full_ident_path_tracker.get(table_name)[-1] + + for table_update in table_updates: + # Remove the entire column with hint from parent table if it was created as a nested table + if is_nested_table(table_update): + parent_name = table_update.get("parent") + parent_col_schemas = self.schema.get_table_columns( + parent_name, include_incomplete=True + ) + parent_col_schema = parent_col_schemas.get(last_ident_path) + + if parent_col_schema and has_seen_null_first_hint(parent_col_schema): + parent_col_schemas.pop(last_ident_path) + parent_updates = schema_update.get(parent_name, []) + for j, parent_update in enumerate(parent_updates): + if last_ident_path in parent_update["columns"]: + schema_update[parent_name][j]["columns"].pop(last_ident_path) + def _coerce_row( self, table_name: str, parent_table: str, row: StrAny ) -> Tuple[DictStrAny, TPartialTableSchema]: diff --git a/dlt/normalize/normalize.py b/dlt/normalize/normalize.py index f12b711722..c3e28f3f27 100644 --- a/dlt/normalize/normalize.py +++ b/dlt/normalize/normalize.py @@ -13,7 +13,11 @@ from dlt.common.runtime import signals from dlt.common.runtime.collector import Collector, NULL_COLLECTOR from dlt.common.schema.typing import TStoredSchema, TTableSchema -from dlt.common.schema.utils import merge_schema_updates +from dlt.common.schema.utils import ( + merge_schema_updates, + has_seen_null_first_hint, + remove_seen_null_first_hint, +) from dlt.common.storages import ( NormalizeStorage, SchemaStorage, @@ -159,7 +163,9 @@ def map_single(self, schema: Schema, load_id: str, files: Sequence[str]) -> TWor ) return result - def clean_x_normalizer(self, load_id: str, table_name: str, table_schema: TTableSchema) -> None: + def clean_x_normalizer( + self, load_id: str, table_name: str, table_schema: TTableSchema, path_separator: str + ) -> None: x_normalizer = table_schema.setdefault("x-normalizer", {}) # drop evolve once for all tables that seen data x_normalizer.pop("evolve-columns-once", None) @@ -170,14 +176,22 @@ def clean_x_normalizer(self, load_id: str, table_name: str, table_schema: TTable ) x_normalizer["seen-data"] = True - # Handle column-level x-normalizer - # drop seen-null-first flag if data type was set - for column in table_schema.get("columns", {}).values(): - col_x_normalizer = column.setdefault("x-normalizer", {}) - if col_x_normalizer.get("seen-null-first") and "data_type" in column: - col_x_normalizer.pop("seen-null-first", None) - if not col_x_normalizer: - column.pop("x-normalizer", None) + # Handle column-level seen-null-first hint in x-normalizer hints + col_schemas = table_schema.get("columns", {}) + for col_name, col_schema in list(col_schemas.items()): + if has_seen_null_first_hint(col_schema): + if "data_type" in col_schema: + # 1. Remove seen-null-first hint if data type is set + remove_seen_null_first_hint(col_schema) + else: + # 2. Remove entire column if it was created as compound column(s) + # TODO: use column ident paths (also in JsonLItemsNormalizer._coerce_null_value), + # path separator is not reliable with shortened names + if any( + col.startswith(col_name + path_separator) + for col in list(col_schemas.keys()) + ): + table_schema["columns"].pop(col_name) def spool_files( self, load_id: str, schema: Schema, map_f: TMapFuncType, files: Sequence[str] @@ -196,7 +210,7 @@ def spool_files( for table_name in table_metrics: table = schema.tables[table_name] verify_normalized_table(schema, table, self.config.destination_capabilities) - self.clean_x_normalizer(load_id, table_name, table) + self.clean_x_normalizer(load_id, table_name, table, schema.naming.PATH_SEPARATOR) # schema is updated, save it to schema volume if schema.is_modified: logger.info( diff --git a/docs/uv.lock b/docs/uv.lock index b00fe107d4..7e5d0f1360 100644 --- a/docs/uv.lock +++ b/docs/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.9.2, <3.14" resolution-markers = [ "python_full_version >= '3.13' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'", @@ -1089,7 +1089,7 @@ dependencies = [ { name = "humanize", version = "4.13.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, { name = "humanize", version = "4.14.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, { name = "jsonpath-ng" }, - { name = "orjson", marker = "sys_platform != 'emscripten'" }, + { name = "orjson", marker = "platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'" }, { name = "packaging" }, { name = "pathvalidate" }, { name = "pendulum" }, @@ -1169,7 +1169,7 @@ requires-dist = [ { name = "cron-descriptor", marker = "extra == 'cli'", specifier = ">=1.2.32" }, { name = "databricks-sdk", marker = "extra == 'databricks'", specifier = ">=0.38.0" }, { name = "databricks-sql-connector", marker = "python_full_version >= '3.13' and extra == 'databricks'", specifier = ">=3.6.0" }, - { name = "databricks-sql-connector", marker = "python_full_version < '3.13' and extra == 'databricks'", specifier = ">=2.9.3,<4" }, + { name = "databricks-sql-connector", marker = "python_full_version < '3.13' and extra == 'databricks'", specifier = ">=2.9.3" }, { name = "db-dtypes", marker = "extra == 'bigquery'", specifier = ">=1.2.0" }, { name = "db-dtypes", marker = "extra == 'gcp'", specifier = ">=1.2.0" }, { name = "deltalake", marker = "extra == 'deltalake'", specifier = ">=0.25.1" }, @@ -1195,8 +1195,8 @@ requires-dist = [ { name = "marimo", marker = "extra == 'workspace'", specifier = ">=0.14.5" }, { name = "mcp", marker = "python_full_version >= '3.10' and extra == 'workspace'", specifier = ">=1.2.1" }, { name = "orjson", marker = "python_full_version >= '3.14'", specifier = ">=3.11.0" }, - { name = "orjson", marker = "sys_platform != 'emscripten'", specifier = ">=3.10.1" }, { name = "orjson", marker = "platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'", specifier = ">=3.6.7,!=3.9.11,!=3.9.12,!=3.9.13,!=3.9.14,!=3.10.1,<4" }, + { name = "orjson", marker = "platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'", specifier = ">=3.10.1" }, { name = "packaging", specifier = ">=21.1" }, { name = "paramiko", marker = "extra == 'sftp'", specifier = ">=3.3.0" }, { name = "pathspec", marker = "extra == 'workspace'", specifier = ">=0.11.2" }, @@ -1256,7 +1256,11 @@ requires-dist = [ provides-extras = ["gcp", "bigquery", "postgres", "redshift", "parquet", "duckdb", "ducklake", "filesystem", "s3", "gs", "az", "sftp", "http", "snowflake", "motherduck", "cli", "athena", "weaviate", "mssql", "synapse", "qdrant", "databricks", "clickhouse", "dremio", "lancedb", "deltalake", "sql-database", "sqlalchemy", "pyiceberg", "postgis", "workspace", "dbml"] [package.metadata.requires-dev] -adbc = [{ name = "adbc-driver-postgresql", specifier = ">=1.6.0" }] +adbc = [ + { name = "adbc-driver-manager", specifier = ">=1.8.0" }, + { name = "adbc-driver-postgresql", specifier = ">=1.8.0" }, + { name = "dbc", specifier = ">=0.1.0" }, +] airflow = [{ name = "apache-airflow", marker = "python_full_version < '3.12'", specifier = ">=2.8.0,<3" }] dashboard-tests = [ { name = "playwright", specifier = ">=1.52.0,<2" }, diff --git a/docs/website/docs/dlt-ecosystem/destinations/mssql.md b/docs/website/docs/dlt-ecosystem/destinations/mssql.md index a51cfc603f..f620633a58 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/mssql.md +++ b/docs/website/docs/dlt-ecosystem/destinations/mssql.md @@ -112,6 +112,48 @@ If you set the [`replace` strategy](../../general-usage/full-loading.md) to `sta recreated with an `ALTER SCHEMA ... TRANSFER`. The operation is atomic: MSSQL supports DDL transactions. ## Data loading + +:::tip +We recommend using ADBC + parquet to load data. We observed 10x - 100x increase in loading speed compared to the INSERT method. **parquet** file format +will activate automatically if the right driver is present in the system. +::: + +### Fast loading with parquet + +[parquet](../file-formats/parquet.md) file format is supported via [ADBC driver](https://arrow.apache.org/adbc/). **mssql** driver is provided by +[Columnar](https://columnar.tech/). To install it you'll need `dbc` which is a tool to manager ADBC drivers: +```sh +pip install adbc-driver-manager dbc +dbc install mssql +``` + +with `uv` you can run `dbc` directly: +```sh +uv tool run dbc search +``` +`dlt` will make **parquet** the preferred file format once driver is detected at runtime. This method is 10x-70x faster than INSERT and +we make it a default for all input data types. + +Not all arrow data types are supported by the driver, see driver docs for more details: +* fixed length binary +* time with precision different than microseconds + +We copy parquet files with batches of size of 1 row group. All groups are copied in a single transaction. + +:::caution +It looks like ADBC driver is based on [go-mssqldb](https://github.com/denisenkom/go-mssqldb?tab=readme-ov-file) + +DSN format is different. We translate a few overlapping keys. `pyodbc` and `adbc` ignore unknown keys so you can specify keys for both in the same string. +::: + +You can go back to `insert_values` by passing `loader_file_format` to a resource or pipeline +```py +# revert to INSERT statements +pipeline.run(data_iter, dataset_name="speed_test_2", write_disposition="replace", table_name="unsw_flow", loader_file_format="insert_values") +``` + +### Loading with INSERT statements + Data is loaded via INSERT statements by default. MSSQL has a limit of 1000 rows per INSERT, and this is what we use. We send multiple sql statements in a single batch. In case you observe odbc driver locking (ie. when connection with open transaction leaks into the pool) you can: @@ -121,7 +163,7 @@ import pyodbc pyodbc.pooling = False ``` -2. disable batchning of multiple statements in `dlt` +2. disable batching of multiple statements in `dlt` ```py dlt.destinations.mssql("mssql://loader:@loader.database.windows.net/dlt_data?connect_timeout=15", supports_multiple_statements=False) ``` @@ -129,6 +171,7 @@ dlt.destinations.mssql("mssql://loader:@loader.database.windows.net/dl ## Supported file formats * [insert-values](../file-formats/insert-format.md) is used by default +* [parquet](../file-formats/parquet.md) is used if mssql ADBC driver is installed ## Supported column hints **mssql** will create unique indexes for all columns with `unique` hints. This behavior **is disabled by default**. diff --git a/docs/website/docs/dlt-ecosystem/destinations/postgres.md b/docs/website/docs/dlt-ecosystem/destinations/postgres.md index 85fdcf7171..21cc09c3de 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/postgres.md +++ b/docs/website/docs/dlt-ecosystem/destinations/postgres.md @@ -116,7 +116,13 @@ In the example above, `arrow_table` will be converted to CSV with **pyarrow** an ### Fast loading with Arrow tables and parquet -[parquet](../file-formats/parquet.md) file format is supported via [ADBC driver](https://arrow.apache.org/adbc/current/driver/postgresql.html). Install the right driver to enable it: +[parquet](../file-formats/parquet.md) file format is supported via [ADBC driver](https://arrow.apache.org/adbc/current/driver/postgresql.html). +To install it you'll need `dbc` which is a tool to manager ADBC drivers: +```sh +pip install adbc-driver-manager dbc +dbc install postgresql +``` +Installation via Python package is supported as well: ```sh pip install adbc-driver-postgresql ``` @@ -128,7 +134,7 @@ Not all `postgres` types are supported, see driver docs for more details: * We observed problems with some decimal precision/scale ie. `decimal128(6, 2)` is not properly decoded. * large decimals are not supported. `postgres` is the only destination that fully supports `wei` (256 bit) decimal precision, this does not work with ADBC. -We copy parquet files with batches of size of 1 row group. One files is copied in a single transaction. +We copy parquet files with batches of size of 1 row group. Each file is copied in a single transaction. ## Supported file formats * [insert-values](../file-formats/insert-format.md) is used by default. diff --git a/docs/website/docs/dlt-ecosystem/destinations/sqlalchemy.md b/docs/website/docs/dlt-ecosystem/destinations/sqlalchemy.md index 0780d4b4a5..757833ad9f 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/sqlalchemy.md +++ b/docs/website/docs/dlt-ecosystem/destinations/sqlalchemy.md @@ -231,6 +231,36 @@ The following write dispositions are supported: ## Data loading +### Fast loading with parquet + +[parquet](../file-formats/parquet.md) file format is supported via [ADBC driver](https://arrow.apache.org/adbc/) for **mysql** and **sqlite**. +MySQL driver is provided by [Columnar](https://columnar.tech/). To install it you'll need `dbc` which is a tool to manager ADBC drivers: +```sh +pip install adbc-driver-manager dbc +dbc install mysql +``` + +with `uv` you can run `dbc` directly: +```sh +uv tool run dbc install sqlite +``` +Note that **we do not detect sqllite** driver [installed via Python package](https://arrow.apache.org/adbc/current/driver/sqlite.html) + +You must set have correct driver installed and `loader_file_format` set to `parquet` in order to use ADBC. If driver is not found, +`dlt` will convert parquet into INSERT statements. + +Not all arrow data types are supported by the **sqlite**: +* decimal types +* time type + +We copy parquet files with batches of size of 1 row group. All groups are copied in a single transaction. + +:::caution +It looks like ADBC driver is based on go mysql. We do minimal conversion of connection strings from SQLAlchemy (ssl cert settings for mysql). +::: + +### Loading with SqlAlchemy batch INSERTs + Data is loaded in a dialect-agnostic manner with an `insert` statement generated by SQLAlchemy's core API. Rows are inserted in batches as long as the underlying database driver supports it. By default, the batch size is 10,000 rows. diff --git a/docs/website/docs/general-usage/merge-loading.md b/docs/website/docs/general-usage/merge-loading.md index ad09f347e7..80e85c4576 100644 --- a/docs/website/docs/general-usage/merge-loading.md +++ b/docs/website/docs/general-usage/merge-loading.md @@ -567,6 +567,35 @@ def dim_customer(): ... ``` +#### Reset boundary timestamp to the current load time +To stop using a previously set `boundary_timestamp` and revert to the default (the current load package creation time), set `boundary_timestamp` to `None`. You can do this either at definition time or dynamically with `apply_hints` before a run. + +Definition-time (always use current load time): +```py +@dlt.resource( + write_disposition={ + "disposition": "merge", + "strategy": "scd2", + "boundary_timestamp": None, # reset to current load time + } +) +def dim_customer(): + ... +``` + +Per-run reset (override just for this run): +```py +r.apply_hints( + write_disposition={ + "disposition": "merge", + "strategy": "scd2", + "boundary_timestamp": None, # reset to current load time for this run + } +) +pipeline.run(r(...)) +``` +When `boundary_timestamp` is `None` (or omitted), `dlt` uses the load package's creation timestamp as the boundary for both retiring existing versions and creating new versions. + ### Example: Use your own row hash By default, `dlt` generates a row hash based on all columns provided by the resource and stores it in `_dlt_id`. You can use your own hash instead by specifying `row_version_column_name` in the `write_disposition` dictionary. You might already have a column present in your resource that can naturally serve as a row hash, in which case it's more efficient to use those pre-existing hash values than to generate new artificial ones. This option also allows you to use hashes based on a subset of columns, in case you want to ignore changes in some of the columns. When using your own hash, values for `_dlt_id` are randomly generated. ```py diff --git a/mypy.ini b/mypy.ini index 46e519bb5f..102b03389f 100644 --- a/mypy.ini +++ b/mypy.ini @@ -166,6 +166,9 @@ ignore_missing_imports = True [mypy-adbc_driver_postgresql.*] ignore_missing_imports = True +[mypy-adbc_driver_manager.*] +ignore_missing_imports = True + [mypy-playwright.*] ignore_missing_imports = True diff --git a/pyproject.toml b/pyproject.toml index 73899a2544..0a7cbd632d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,7 +48,7 @@ dependencies = [ "giturlparse>=0.10.0", "orjson>=3.6.7,<4,!=3.9.11,!=3.9.12,!=3.9.13,!=3.9.14,!=3.10.1 ; platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'", # enables pyodide install, mind that 3.10.1 is a buggy version and should be yanked - "orjson>=3.10.1 ; sys_platform != 'emscripten'", + "orjson>=3.10.1 ; platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'", "orjson>=3.11.0 ; python_version > '3.13'", "tenacity>=8.0.2", "jsonpath-ng>=1.5.3", @@ -298,7 +298,8 @@ dbt = [ "dbt-sqlserver>=1.5.0 ; python_version < '3.13'", ] adbc = [ - "adbc-driver-postgresql>=1.6.0" + "dbc>=0.1.0", + "adbc-driver-manager>=1.8.0", ] [project.entry-points.dlt] diff --git a/tests/.dlt/config.toml b/tests/.dlt/config.toml index 9036803472..6f9ea9d9bd 100644 --- a/tests/.dlt/config.toml +++ b/tests/.dlt/config.toml @@ -1,3 +1,6 @@ +# disable adbc by default, it will be enabled only for adbc tests +disable_adbc_detection=true + [runtime] # sentry_dsn="https://6f6f7b6f8e0f458a89be4187603b55fe@o1061158.ingest.sentry.io/4504819859914752" dlthub_telemetry=false diff --git a/tests/common/_annotated_futures_module.py b/tests/common/_annotated_futures_module.py new file mode 100644 index 0000000000..553b445a3f --- /dev/null +++ b/tests/common/_annotated_futures_module.py @@ -0,0 +1,30 @@ +"""Defines a few objects to test forward annotations""" + +from __future__ import annotations +from typing import TypedDict +from typing_extensions import ForwardRef + +from dlt.common.configuration import configspec +from dlt.common.configuration.specs import BaseConfiguration + + +class _Sentinel(str): + pass + + +class AnnTypedDict(TypedDict): + word: str + sentinel: _Sentinel # automatically converted to ForwardRef + + +@configspec +class AnnConfigSpec(BaseConfiguration): + word: str = None + sentinel: _Sentinel = None + sentinel_f: ForwardRef("_Sentinel") = None # type: ignore + + +def ann_func( + sentinel: _Sentinel, sentinel_f: ForwardRef("_Sentinel"), word: str = "word" # type: ignore +) -> _Sentinel: + pass diff --git a/tests/common/configuration/test_annotation_future.py b/tests/common/configuration/test_annotation_future.py index 800d689fb7..715e90e348 100644 --- a/tests/common/configuration/test_annotation_future.py +++ b/tests/common/configuration/test_annotation_future.py @@ -1,16 +1,21 @@ from __future__ import annotations +import pytest from typing import Optional +import dlt from dlt.common.configuration import configspec +from dlt.common.configuration.inject import get_fun_spec, with_config from dlt.common.configuration.resolve import resolve_configuration from dlt.common.configuration.specs import BaseConfiguration +from dlt.common.typing import ForwardRef from tests.utils import preserve_environ from tests.common.configuration.utils import environment +from tests.common._annotated_futures_module import AnnConfigSpec -def test_str_annotations(environment) -> None: +def test_configspec_annotation(environment) -> None: @configspec class DataConf(BaseConfiguration): x: int = None @@ -23,3 +28,77 @@ class DataConf(BaseConfiguration): environment["X"] = "10" c = resolve_configuration(DataConf()) assert c.x == 10 + + +def test_global_configspec_global_annotation(environment) -> None: + fields = AnnConfigSpec.get_resolvable_fields() + assert fields["word"] is str + # NOTE: do not import _Sentinel - this will add it to globals of this module and make this + # tests irrelevant + assert fields["sentinel"].__name__ == "_Sentinel" + assert fields["sentinel_f"].__name__ == "_Sentinel" + + # resolve + environment["WORD"] = "W" + environment["SENTINEL"] = "S" + environment["SENTINEL_F"] = "SF" + c = resolve_configuration(AnnConfigSpec()) + assert c.word == "W" + assert c.sentinel == "S" + assert c.sentinel_f == "SF" + + +def test_configspec_inner_annotation(environment) -> None: + # make it resolvable by deriving from str + class _Sentinel(str): + pass + + # will not resolve inner annotation, we must be passing locals which is beyond the scope + # of configspec (could be done by inspecting caller frame) + + with pytest.raises(NameError): + + @configspec + class InnerAnnotation(BaseConfiguration): + sentinel: _Sentinel = None + word: str = None + + +class _SentinelG(str): + pass + + +@with_config +def ann_func( + sentinel: _SentinelG, + sentinel_f: ForwardRef("_SentinelG") = dlt.config.value, # type: ignore + word: str = "word", +): + return sentinel, sentinel_f, word + + +def test_with_config_global_annotation(environment) -> None: + spec = get_fun_spec(ann_func) + assert spec.get_resolvable_fields() == {"sentinel_f": _SentinelG, "word": str} + + environment["WORD"] = "W" + environment["SENTINEL_F"] = "SF" + + assert ann_func(_SentinelG("F")) == ("F", "SF", "W") + + +def test_with_config_inner_annotation(environment) -> None: + class _SentinelI(str): + pass + + @with_config + def ann_func( + sentinel: _SentinelI, + sentinel_f: ForwardRef("_SentinelI") = dlt.config.value, # type: ignore + word: str = "word", + ): + return sentinel, sentinel_f, word + + spec = get_fun_spec(ann_func) + # only "word" will be included in SPEC, sentinel_f could not be resolved and is invalid hint + assert spec.get_resolvable_fields() == {"word": str} diff --git a/tests/common/configuration/test_configuration.py b/tests/common/configuration/test_configuration.py index 4a2ac4716f..d1748d58a5 100644 --- a/tests/common/configuration/test_configuration.py +++ b/tests/common/configuration/test_configuration.py @@ -1192,6 +1192,7 @@ def test_coercion_rules() -> None: def test_is_valid_hint() -> None: + assert is_valid_hint(bool) is True assert is_valid_hint(Any) is True # type: ignore[arg-type] assert is_valid_hint(Optional[Any]) is True # type: ignore[arg-type] assert is_valid_hint(RuntimeConfiguration) is True diff --git a/tests/common/destination/test_reference.py b/tests/common/destination/test_reference.py index 44d9961b9e..0eb45015d4 100644 --- a/tests/common/destination/test_reference.py +++ b/tests/common/destination/test_reference.py @@ -1,5 +1,5 @@ from collections.abc import MutableMapping -from operator import eq +import pickle from typing import Dict import pytest @@ -14,7 +14,7 @@ from dlt.common.normalizers.naming import sql_ci_v1, sql_cs_v1 from tests.common.configuration.utils import environment -from tests.utils import ACTIVE_DESTINATIONS +from tests.utils import IMPLEMENTED_DESTINATIONS def test_import_unknown_destination() -> None: @@ -166,7 +166,7 @@ def test_import_module_by_path() -> None: def test_import_all_destinations() -> None: # this must pass without the client dependencies being imported - for dest_type in ACTIVE_DESTINATIONS: + for dest_type in IMPLEMENTED_DESTINATIONS: dest = DestinationReference.from_reference( dest_type, None, dest_type + "_name", "production" ) @@ -175,7 +175,12 @@ def test_import_all_destinations() -> None: assert dest.config_params["environment"] == "production" assert dest.config_params["destination_name"] == dest_type + "_name" dest.spec() - assert isinstance(dest.capabilities(), DestinationCapabilitiesContext) + caps = dest.capabilities() + assert isinstance(caps, DestinationCapabilitiesContext) + # make sure caps are pickable + pickled_caps = pickle.dumps(caps) + unpickled_caps = pickle.loads(pickled_caps) + assert caps.supported_loader_file_formats == unpickled_caps.supported_loader_file_formats # every destination is in the registry assert dest.destination_type in DestinationReference.DESTINATIONS assert DestinationReference.find(dest_type) is DestinationReference.find( diff --git a/tests/common/test_typing.py b/tests/common/test_typing.py index 2821514d31..7ee82ac96c 100644 --- a/tests/common/test_typing.py +++ b/tests/common/test_typing.py @@ -1,6 +1,4 @@ from types import SimpleNamespace - -import pytest from dataclasses import dataclass from typing import ( Any, @@ -35,6 +33,7 @@ extract_inner_type, extract_union_types, get_all_types_of_class_in_union, + get_type_globals, is_dict_generic_type, is_list_generic_type, is_literal_type, @@ -333,3 +332,27 @@ class Foo(Generic[T]): assert get_generic_type_argument_from_instance(instance, 1) is int instance = SimpleNamespace(__orig_class__=Optional[Foo[Any]]) assert get_generic_type_argument_from_instance(instance, 1) is int + + +def test_get_type_globals() -> None: + import sys + from ._annotated_futures_module import ann_func, AnnTypedDict, AnnConfigSpec + + assert "_Sentinel" in get_type_globals(ann_func) + assert "_Sentinel" in get_type_globals(AnnTypedDict) + assert "_Sentinel" in get_type_globals(AnnConfigSpec) + assert "_Sentinel" in get_type_globals(AnnConfigSpec()) + + assert get_type_globals(None) == __builtins__ + + f = lambda x: x + assert get_type_globals(f) == f.__globals__ + + Dynamic = type("Dynamic", (), {}) + assert get_type_globals(Dynamic) == sys.modules[Dynamic.__module__].__dict__ + + class NoModule: + pass + + NoModule.__module__ = None + assert get_type_globals(NoModule()) == {} diff --git a/tests/common/test_typing_forward_annotations.py b/tests/common/test_typing_forward_annotations.py new file mode 100644 index 0000000000..54b1473628 --- /dev/null +++ b/tests/common/test_typing_forward_annotations.py @@ -0,0 +1,121 @@ +from __future__ import annotations + +import inspect +import pytest +from typing import TypedDict + +try: + # backport from inspect + from typing_extensions import get_annotations # type: ignore +except ImportError: + from inspect import get_annotations # type: ignore + +from dlt.common.typing import AnyFun, get_type_hints, ForwardRef, resolve_single_annotation + +from ._annotated_futures_module import AnnTypedDict, ann_func + + +def _produce_typed_dict_inner_annotations(): + class _Sentinel: + pass + + class InnerAnnotation(TypedDict): + sentinel: _Sentinel + word: str + + # annotates with str but TypedDict converts into ForwardRef + return InnerAnnotation + + +def _produce_func_inner_annotations() -> AnyFun: + class _Sentinel: + pass + + def _func(s: _Sentinel, arg1: bool = False) -> _Sentinel: + pass + + return _func + + +def test_inner_forward_annotations(): + # tests various assumptions in forward annotations + + # typeddict converts forward str ann to ForwardRef 🤯 + # NOTE: re. the above: eval_str does nothing + td_ann = get_annotations(_produce_typed_dict_inner_annotations(), eval_str=False) + assert isinstance(td_ann["word"], ForwardRef) + assert isinstance(td_ann["sentinel"], ForwardRef) + # resolve forward ref + assert resolve_single_annotation(td_ann["word"]) is str + # inner forward ref cannot be resolved + with pytest.raises(NameError): + assert resolve_single_annotation(td_ann["sentinel"], raise_on_error=True) is str + # forward ref could not be resolved + assert isinstance(resolve_single_annotation(td_ann["sentinel"]), ForwardRef) + # get_type_hints cannot resolve + with pytest.raises(NameError): + get_type_hints(_produce_typed_dict_inner_annotations()) + + # function definition + with pytest.raises(NameError): + fun_ann = get_annotations(_produce_func_inner_annotations(), eval_str=True) + fun_ann = get_annotations( + _produce_func_inner_annotations(), eval_str=False + ) # _produce_func_inner_annotations().__annotations__ # + assert isinstance(fun_ann["arg1"], str) + assert isinstance(fun_ann["s"], str) + assert resolve_single_annotation(fun_ann["arg1"]) is bool + # cannot resolve, keeps string + assert isinstance(resolve_single_annotation(fun_ann["s"]), str) + # get_type_hints cannot resolve + with pytest.raises(NameError): + get_type_hints(_produce_func_inner_annotations()) + # inspect signature + sig = inspect.signature(_produce_func_inner_annotations()) + # not resolved + assert isinstance(sig.parameters["s"].annotation, str) + + +def test_module_forward_annotations(): + from . import _annotated_futures_module + + td_ann = get_annotations(AnnTypedDict) + assert isinstance(td_ann["word"], ForwardRef) + assert isinstance(td_ann["sentinel"], ForwardRef) + assert resolve_single_annotation(td_ann["word"]) is str + # will resolve str in global typed dict + # NOTE: do not import sentinel - it becomes visible in this module globals, this makes test meaningless + assert ( + resolve_single_annotation( + td_ann["sentinel"], globalns=_annotated_futures_module.__dict__ + ).__name__ + == "_Sentinel" + ) + # typed hints should resolve + get_type_hints(AnnTypedDict) + + # func definition + fun_ann = get_annotations(ann_func, eval_str=True) + assert fun_ann["sentinel"].__name__ == "_Sentinel" + # kept as forward ref + assert isinstance(fun_ann["sentinel_f"], ForwardRef) + + fun_ann = get_annotations(ann_func, eval_str=False) + assert isinstance(fun_ann["sentinel"], str) + assert isinstance(fun_ann["sentinel_f"], str) + assert resolve_single_annotation(fun_ann["word"]) is str + # must pass module + with pytest.raises(NameError): + resolve_single_annotation(fun_ann["sentinel"], raise_on_error=True) + assert ( + resolve_single_annotation( + fun_ann["sentinel"], globalns=_annotated_futures_module.__dict__ + ).__name__ + == "_Sentinel" + ) + assert ( + resolve_single_annotation( + fun_ann["sentinel_f"], globalns=_annotated_futures_module.__dict__ + ).__name__ + == "_Sentinel" + ) diff --git a/tests/e2e/helpers/dashboard/test_e2e.py b/tests/e2e/helpers/dashboard/test_e2e.py index eff2255604..48f43427f3 100644 --- a/tests/e2e/helpers/dashboard/test_e2e.py +++ b/tests/e2e/helpers/dashboard/test_e2e.py @@ -122,16 +122,23 @@ def test_multi_schema_selection(page: Page, multi_schema_pipeline: Any): schema_selector = page.get_by_test_id("marimo-plugin-dropdown") schema_selector.select_option("fruitshop_customers") + expect(schema_selector).to_have_value("fruitshop_customers") + schema_selector.scroll_into_view_if_needed() + expect(page.get_by_text("customers", exact=True).nth(0)).to_be_visible() expect(page.get_by_text("inventory", exact=True)).to_have_count(0) expect(page.get_by_text("purchases", exact=True)).to_have_count(0) schema_selector.select_option("fruitshop_inventory") + expect(schema_selector).to_have_value("fruitshop_inventory") + expect(page.get_by_text("inventory", exact=True).nth(0)).to_be_visible() expect(page.get_by_text("customers", exact=True)).to_have_count(0) expect(page.get_by_text("purchases", exact=True)).to_have_count(0) schema_selector.select_option("fruitshop_purchases") + expect(schema_selector).to_have_value("fruitshop_purchases") + expect(page.get_by_text("purchases", exact=True).nth(0)).to_be_visible() expect(page.get_by_text("inventory", exact=True)).to_have_count(0) expect(page.get_by_text("customers", exact=True)).to_have_count(0) diff --git a/tests/load/pipeline/test_adbc_loading.py b/tests/load/pipeline/test_adbc_loading.py new file mode 100644 index 0000000000..e24c21201f --- /dev/null +++ b/tests/load/pipeline/test_adbc_loading.py @@ -0,0 +1,94 @@ +import os +import pytest + +import dlt + +from tests.pipeline.utils import load_table_counts +from tests.utils import preserve_environ +from tests.cases import table_update_and_row +from tests.load.pipeline.utils import get_load_package_jobs +from tests.load.utils import ( + destinations_configs, + DestinationTestConfiguration, +) + + +# NOTE: you need to install ADBC drivers to run this tests using dbc +# dbc install: postgresql, mysql, mssql, sqlite + + +@pytest.fixture(autouse=True) +def enable_adbc(preserve_environ) -> None: + os.environ["DISABLE_ADBC_DETECTION"] = "0" + + +@pytest.mark.parametrize( + "destination_config", + destinations_configs(default_sql_configs=True, subset=["postgres", "mssql", "sqlalchemy"]), + ids=lambda x: x.name, +) +def test_adbc_detection(destination_config: DestinationTestConfiguration) -> None: + from dlt.destinations._adbc_jobs import has_adbc_driver + + driver = destination_config.destination_name or destination_config.destination_type + if driver == "postgres": + from dlt.destinations.impl.postgres.factory import get_adbc_driver_location + + driver = get_adbc_driver_location() + elif driver == "sqlalchemy_sqlite": + driver = "sqlite" + elif driver == "sqlalchemy_mysql": + driver = "mysql" + + assert has_adbc_driver(driver)[0] is True + + +@pytest.mark.parametrize( + "destination_config", + destinations_configs(default_sql_configs=True, subset=["postgres", "mssql", "sqlalchemy"]), + ids=lambda x: x.name, +) +def test_adbc_parquet_loading(destination_config: DestinationTestConfiguration) -> None: + # if destination_config.destination_name == "sqlalchemy_sqlite": + # pytest.skip("skip generic ADBC test for sqlite because just a few data types are supported") + column_schemas, data_ = table_update_and_row() + + pipeline = destination_config.setup_pipeline("pipeline_adbc", dev_mode=True) + + if destination_config.destination_type in ("postgres", "mssql"): + del column_schemas["col11_precision"] # TIME(3) not supported + if destination_config.destination_type == "postgres": + del column_schemas["col6_precision"] # adbc cannot process decimal(6,2) + else: + del column_schemas["col7_precision"] # adbc cannot process fixed binary + + if destination_config.destination_name == "sqlalchemy_sqlite": + for k, v in column_schemas.items(): + # decimals not supported + if v["data_type"] in ("decimal", "wei", "time"): + data_[k] = str(data_[k]) + column_schemas[k]["data_type"] = "text" + + @dlt.resource( + file_format="parquet", columns=column_schemas, write_disposition="merge", primary_key="col1" + ) + def complex_resource(): + # add child table + data_["child"] = [1, 2, 3] + yield data_ + + info = pipeline.run(complex_resource()) + jobs = get_load_package_jobs( + info.load_packages[0], "completed_jobs", "complex_resource", ".parquet" + ) + # there must be a parquet job or adbc is not installed so we fall back to other job type + assert len(jobs) == 1 + # make sure we can read data back. TODO: verify data types + rows = pipeline.dataset().table("complex_resource").fetchall() + assert len(rows) == 1 + rows = pipeline.dataset().table("complex_resource__child").fetchall() + assert len(rows) == 3 + + # load again and make sure we still have 1 record + pipeline.run(complex_resource()) + assert load_table_counts(pipeline) == {"complex_resource": 1, "complex_resource__child": 3} diff --git a/tests/load/pipeline/test_postgres.py b/tests/load/pipeline/test_postgres.py index f8998c48d9..d19b0e3793 100644 --- a/tests/load/pipeline/test_postgres.py +++ b/tests/load/pipeline/test_postgres.py @@ -1,5 +1,4 @@ import os -import copy import hashlib import random from string import ascii_lowercase @@ -12,9 +11,6 @@ from dlt.destinations import filesystem, redshift - -from tests.cases import table_update_and_row -from tests.load.pipeline.utils import get_load_package_jobs from tests.load.utils import ( destinations_configs, DestinationTestConfiguration, @@ -138,33 +134,6 @@ def test_pipeline_explicit_destination_credentials( ) -@pytest.mark.parametrize( - "destination_config", - destinations_configs(default_sql_configs=True, subset=["postgres"]), - ids=lambda x: x.name, -) -def test_postgres_adbc_parquet_loading(destination_config: DestinationTestConfiguration) -> None: - column_schemas, data_types = table_update_and_row() - - pipeline = destination_config.setup_pipeline( - "test_postgres_adbc_parquet_loading", dev_mode=True - ) - - del column_schemas["col6_precision"] # adbc cannot process decimal(6,2) - del column_schemas["col11_precision"] # TIME(3) not supported - - @dlt.resource(file_format="parquet", columns=column_schemas, max_table_nesting=0) - def complex_resource(): - yield data_types - - info = pipeline.run(complex_resource()) - jobs = get_load_package_jobs( - info.load_packages[0], "completed_jobs", "complex_resource", ".parquet" - ) - # there must be a parquet job or adbc is not installed so we fall back to other job type - assert len(jobs) == 1 - - # TODO: uncomment and finalize when we implement encoding for psycopg2 # @pytest.mark.parametrize( # "destination_config", diff --git a/tests/load/pipeline/test_scd2.py b/tests/load/pipeline/test_scd2.py index c189775ca5..dc5084b7e7 100644 --- a/tests/load/pipeline/test_scd2.py +++ b/tests/load/pipeline/test_scd2.py @@ -1,5 +1,6 @@ # timezone is removed from all datetime objects in these tests to simplify comparison +from unittest import mock import pytest from typing import List, Dict, Any, Optional from datetime import date, datetime, timezone # noqa: I251 @@ -633,7 +634,7 @@ def r(): @pytest.mark.parametrize( "destination_config", - destinations_configs(default_sql_configs=True, subset=["duckdb"]), + destinations_configs(default_sql_configs=True, subset=["sqlalchemy", "duckdb"]), ids=lambda x: x.name, ) def test_boundary_timestamp( @@ -645,6 +646,7 @@ def test_boundary_timestamp( ts2 = "2024-08-22" ts3 = date(2024, 8, 20) # earlier than ts1 and ts2 ts4 = "i_am_not_a_timestamp" + ts5 = pendulum.datetime(2025, 8, 21, 12, 15, tz="UTC").timestamp() @dlt.resource( table_name="dim_test", @@ -657,75 +659,127 @@ def test_boundary_timestamp( def r(data): yield data + # normalize timestamps once for assertions + ts1_dt = strip_timezone(ts1) + ts2_dt = strip_timezone(ts2) + ts3_dt = strip_timezone(ts3) + ts5_dt = strip_timezone(ts5) + # load 1 — initial load dim_snap = [ l1_1 := {"nk": 1, "foo": "foo"}, l1_2 := {"nk": 2, "foo": "foo"}, ] - info = p.run(r(dim_snap), **destination_config.run_kwargs) - assert_load_info(info) - assert load_table_counts(p, "dim_test")["dim_test"] == 2 - expected = [ - {**{FROM: strip_timezone(ts1), TO: None}, **l1_1}, - {**{FROM: strip_timezone(ts1), TO: None}, **l1_2}, - ] - assert get_table(p, "dim_test", "nk") == expected - - # load 2 — different source records, different boundary timestamp - r.apply_hints( - write_disposition={ - "disposition": "merge", - "strategy": "scd2", - "boundary_timestamp": ts2, - } - ) - dim_snap = [ - l2_1 := {"nk": 1, "foo": "bar"}, # natural key 1 updated - # l1_2, # natural key 2 no longer present - l2_3 := {"nk": 3, "foo": "foo"}, # new natural key - ] - info = p.run(r(dim_snap), **destination_config.run_kwargs) - assert_load_info(info) - assert load_table_counts(p, "dim_test")["dim_test"] == 4 - expected = [ - {**{FROM: strip_timezone(ts1), TO: strip_timezone(ts2)}, **l1_1}, # retired - {**{FROM: strip_timezone(ts1), TO: strip_timezone(ts2)}, **l1_2}, # retired - {**{FROM: strip_timezone(ts2), TO: None}, **l2_1}, # new - {**{FROM: strip_timezone(ts2), TO: None}, **l2_3}, # new - ] - assert_records_as_set(get_table(p, "dim_test"), expected) - - # load 3 — earlier boundary timestamp - # we naively apply any valid timestamp - # may lead to "valid from" > "valid to", as in this test case - r.apply_hints( - write_disposition={ - "disposition": "merge", - "strategy": "scd2", - "boundary_timestamp": ts3, - } - ) - dim_snap = [l2_1] # natural key 3 no longer present - info = p.run(r(dim_snap), **destination_config.run_kwargs) - assert_load_info(info) - assert load_table_counts(p, "dim_test")["dim_test"] == 4 - expected = [ - {**{FROM: strip_timezone(ts1), TO: strip_timezone(ts2)}, **l1_1}, # unchanged - {**{FROM: strip_timezone(ts1), TO: strip_timezone(ts2)}, **l1_2}, # unchanged - {**{FROM: strip_timezone(ts2), TO: None}, **l2_1}, # unchanged - {**{FROM: strip_timezone(ts2), TO: strip_timezone(ts3)}, **l2_3}, # retired - ] - assert_records_as_set(get_table(p, "dim_test"), expected) + current_time: Dict[str, Optional[float]] = {"ts": None} + with mock.patch( + "dlt.common.storages.load_package.precise_time", + side_effect=lambda: current_time["ts"], + ): + # load 1 — initial load + current_time["ts"] = pendulum.datetime(2024, 8, 21, 12, 15, tz="UTC").timestamp() + r.apply_hints( + write_disposition={ + "disposition": "merge", + "strategy": "scd2", + "boundary_timestamp": ts1, + } + ) + info = p.run(r(dim_snap), **destination_config.run_kwargs) + assert_load_info(info) + assert load_table_counts(p, "dim_test")["dim_test"] == 2 + expected = [ + {**{FROM: ts1_dt, TO: None}, **l1_1}, + {**{FROM: ts1_dt, TO: None}, **l1_2}, + ] + assert get_table(p, "dim_test", "nk", ts_columns=[FROM, TO]) == expected + + # load 2 — different source records, different boundary timestamp + current_time["ts"] = pendulum.datetime(2024, 8, 22, tz="UTC").timestamp() + dim_snap = [ + l2_1 := {"nk": 1, "foo": "bar"}, # natural key 1 updated + # l1_2, # natural key 2 no longer present + l2_3 := {"nk": 3, "foo": "foo"}, # new natural key + ] + r.apply_hints( + write_disposition={ + "disposition": "merge", + "strategy": "scd2", + "boundary_timestamp": ts2, + } + ) + info = p.run(r(dim_snap), **destination_config.run_kwargs) + assert_load_info(info) + assert load_table_counts(p, "dim_test")["dim_test"] == 4 + expected = [ + {**{FROM: ts1_dt, TO: ts2_dt}, **l1_1}, # retired + {**{FROM: ts1_dt, TO: ts2_dt}, **l1_2}, # retired + {**{FROM: ts2_dt, TO: None}, **l2_1}, # new + {**{FROM: ts2_dt, TO: None}, **l2_3}, # new + ] + assert_records_as_set(get_table(p, "dim_test", ts_columns=[FROM, TO]), expected) + + # load 3 — earlier boundary timestamp + # we naively apply any valid timestamp + # may lead to "valid from" > "valid to", as in this test case + current_time["ts"] = pendulum.datetime(2024, 8, 22, 0, 0, 1, tz="UTC").timestamp() + dim_snap = [l2_1] # natural key 3 no longer present + r.apply_hints( + write_disposition={ + "disposition": "merge", + "strategy": "scd2", + "boundary_timestamp": ts3, + } + ) + info = p.run(r(dim_snap), **destination_config.run_kwargs) + assert_load_info(info) + assert load_table_counts(p, "dim_test")["dim_test"] == 4 + expected = [ + {**{FROM: ts1_dt, TO: ts2_dt}, **l1_1}, # unchanged + {**{FROM: ts1_dt, TO: ts2_dt}, **l1_2}, # unchanged + {**{FROM: ts2_dt, TO: None}, **l2_1}, # unchanged + {**{FROM: ts2_dt, TO: ts3_dt}, **l2_3}, # retired + ] + assert_records_as_set(get_table(p, "dim_test", ts_columns=[FROM, TO]), expected) + + # invalid boundary timestamp should raise error + with pytest.raises(ValueError): + r.apply_hints( + write_disposition={ + "disposition": "merge", + "strategy": "scd2", + "boundary_timestamp": ts4, + } + ) - # invalid boundary timestamp should raise error - with pytest.raises(ValueError): + # run 4 — no boundary timestamp (use current precise_time) + current_time["ts"] = ts5 + dim_snap = [ + l3_1 := {"nk": 1, "foo": "foobar"}, # updated + ] r.apply_hints( write_disposition={ "disposition": "merge", "strategy": "scd2", - "boundary_timestamp": ts4, + "boundary_timestamp": None, } ) + info = p.run(r(dim_snap), **destination_config.run_kwargs) + assert_load_info(info) + assert load_table_counts(p, "dim_test")["dim_test"] == 5 + expected = [ + {**{FROM: ts1_dt, TO: ts2_dt}, **l1_1}, # unchanged + {**{FROM: ts1_dt, TO: ts2_dt}, **l1_2}, # unchanged + { + **{FROM: ts2_dt, TO: ts5_dt}, + **l2_1, + }, # retired in this run + { + **{FROM: ts2_dt, TO: ts3_dt}, + **l2_3, + }, # unchanged (already retired in load 3) + {**{FROM: ts5_dt, TO: None}, **l3_1}, # new current version + ] + assert_records_as_set(get_table(p, "dim_test", ts_columns=[FROM, TO]), expected) @pytest.mark.essential diff --git a/tests/normalize/test_json_item_inference.py b/tests/normalize/test_json_item_inference.py index e844a8e439..99a3303f72 100644 --- a/tests/normalize/test_json_item_inference.py +++ b/tests/normalize/test_json_item_inference.py @@ -610,6 +610,18 @@ def test_coerce_null_value_over_not_null(item_normalizer: JsonLItemsNormalizer) item_normalizer._coerce_row("event_user", None, row) +def _normalize_items_chunk( + root_table_name: str, items: TDataItems, item_normalizer: JsonLItemsNormalizer +) -> TSchemaUpdate: + schema_update = item_normalizer._normalize_chunk( + root_table_name=root_table_name, + items=items, + may_have_pua=False, + skip_write=True, + ) + return schema_update + + @pytest.mark.parametrize( "nested_item", [ @@ -625,69 +637,67 @@ def test_coerce_null_value_over_not_null(item_normalizer: JsonLItemsNormalizer) def test_coerce_null_value_in_nested_table( item_normalizer: JsonLItemsNormalizer, nested_item: TDataItems ) -> None: - """Ensure that a column previously created as a child table - does not attempt new column updates in a subsequent run when it has no values.""" - - def _normalize_items_chunk(items: TDataItems) -> TSchemaUpdate: - schema_update = item_normalizer._normalize_chunk( - root_table_name="nested", - items=items, - may_have_pua=False, - skip_write=True, - ) - return schema_update - - # use very long column names - col_name_a = "a" * (item_normalizer.naming.max_length + 1) - norm_col_name_a = item_normalizer.naming.normalize_path(col_name_a) - nested_tbl_name = item_normalizer.naming.shorten_fragments("nested", f"{norm_col_name_a}") - - col_name_b = "b" * (item_normalizer.naming.max_length + 1) - norm_col_name_b = item_normalizer.naming.normalize_path(col_name_b) - nested_nested_tbl_name = item_normalizer.naming.shorten_fragments( - "nested", f"{norm_col_name_a}", f"{norm_col_name_b}" + """Ensure that a column previously created as a nested table + does not attempt new column updates in the parent table in a subsequent run when it has no values. + """ + + # Create column names that exceed max identifier length + # to ensure that shortened names of nested tables are internally still correctly + # tracked back to column names in the respective parent tables + col_a, col_b = (ch * (item_normalizer.naming.max_length + 1) for ch in "ab") + norm_col_a, norm_col_b = (item_normalizer.naming.normalize_path(col) for col in (col_a, col_b)) + nested_tbl = item_normalizer.naming.shorten_fragments("nested", f"{norm_col_a}") + nested_nested_tbl = item_normalizer.naming.shorten_fragments( + "nested", f"{norm_col_a}", f"{norm_col_b}" ) - # create parent and child tables + # create parent and nested tables schema_update = _normalize_items_chunk( + "nested", [ { "timestamp": 82178.1298812, - col_name_a: [ + col_a: [ { "timestamp": 82178.1298812, - col_name_b: nested_item, + col_b: nested_item, } ], }, - ] + ], + item_normalizer, ) assert "nested" in schema_update - assert nested_tbl_name in schema_update - assert nested_nested_tbl_name in schema_update + assert nested_tbl in schema_update + assert nested_nested_tbl in schema_update - # verify that empty child table columns don't create schema updates + # verify that columns that have been created as nested tables, don't create + # schema updates with seen-null-first hint in parent table schema_update = _normalize_items_chunk( + "nested", [ { "timestamp": 82178.1298812, - col_name_a: [ + col_a: [ { "timestamp": 82178.1298812, - col_name_b: None, + col_b: None, } ], }, - ] + ], + item_normalizer, ) assert not schema_update schema_update = _normalize_items_chunk( + "nested", [ { "timestamp": 82178.1298812, - col_name_a: None, + col_a: None, }, - ] + ], + item_normalizer, ) assert not schema_update diff --git a/tests/pipeline/test_import_export_schema.py b/tests/pipeline/test_import_export_schema.py index 55bad3d6c7..54d5e8d3b8 100644 --- a/tests/pipeline/test_import_export_schema.py +++ b/tests/pipeline/test_import_export_schema.py @@ -1,3 +1,4 @@ +from typing import List, Any, Dict import dlt, os import pytest @@ -246,3 +247,150 @@ def nested_data(): assert "my_table__children" not in p.default_schema.tables else: assert "my_table__children" in p.default_schema.tables + + +def test_empty_column_later_becoming_child_table_removed() -> None: + """ + Test that columns with `seen-null-first` hints are properly removed + from the export schema when they become nested tables. + """ + name = "schema_test" + uniq_id() + p = dlt.pipeline( + pipeline_name=name, + destination=dummy(completed_prob=1), + export_schema_path=EXPORT_SCHEMA_PATH, + ) + + # Create column names that exceed max identifier length + # to ensure that shortened names of nested tables are internally still correctly + # tracked back to column names in the respective parent tables + col_a, col_b = (ch * (p.destination.capabilities().max_identifier_length + 1) for ch in "ab") + + @dlt.resource(table_name="my_table") + def nested_data(with_grandchild: bool): + nested_example_data = EXAMPLE_DATA[0] + children_list: List[Dict[str, Any]] = [{"id": 2, "name": "Max"}] + nested_example_data[col_a] = children_list + if with_grandchild: + children_list[0][col_b] = [{"id": 3, "name": "Maximilian"}] + else: + children_list[0][col_b] = None + yield nested_example_data + + # 1. Column 'b' is null, should get 'seen-null-first' hint + p.run(nested_data(with_grandchild=False)) + + # Calculate expected table and column names + norm_col_a, norm_col_b = ( + p.default_schema.naming.shorten_fragments(col) for col in [col_a, col_b] + ) + + nested_tbl = p.default_schema.naming.shorten_fragments("my_table", f"{norm_col_a}") + nested_nested_tbl = p.default_schema.naming.shorten_fragments( + "my_table", f"{norm_col_a}", f"{norm_col_b}" + ) + + # Column 'b' should exist with 'seen-null-first' hint in the table nested_tbl_name + export_schema = _get_export_schema(name) + assert set(export_schema.tables[nested_tbl]["columns"].keys()) == { + "_dlt_list_idx", + "_dlt_parent_id", + "id", + "_dlt_id", + "name", + norm_col_b, + } + assert ( + export_schema.tables[nested_tbl]["columns"] + .get(norm_col_b)["x-normalizer"] + .get("seen-null-first", False) + ) + + # 2. Column 'b' gets complex data, should create new nested table + p.run(nested_data(with_grandchild=True)) + + # Column 'b' should be removed from the parent table nested_tbl_name + # because it now has its own nested table nested_nested_tbl_name + export_schema = _get_export_schema(name) + assert set(export_schema.tables[nested_tbl]["columns"].keys()) == { + "_dlt_list_idx", + "_dlt_parent_id", + "id", + "_dlt_id", + "name", + } + assert nested_tbl in export_schema.tables + assert nested_nested_tbl in export_schema.tables + assert nested_tbl in p.default_schema.tables + assert nested_nested_tbl in p.default_schema.tables + + +@pytest.mark.parametrize( + "use_long_col_name", + [True, False], + ids=["long_col_names", "short_col_names"], +) +def test_empty_column_later_becoming_compound_columns_removed(use_long_col_name: bool) -> None: + """ + Test that columns with `seen-null-first` hints are properly removed + from the export schema when they become compound column(s) in the same table. + """ + name = "schema_test" + uniq_id() + p = dlt.pipeline( + pipeline_name=name, + destination=dummy(completed_prob=1), + export_schema_path=EXPORT_SCHEMA_PATH, + ) + + col_a = ( + "a" * (p.destination.capabilities().max_identifier_length + 1) if use_long_col_name else "a" + ) + + # 1. Column 'a' is null, should get 'seen-null-first' hint + p.run([{"id": 1, col_a: None}], table_name="my_table") + + # Calculate column name + norm_col_a = p.default_schema.naming.shorten_fragments(col_a) + + # Column 'a' should exist with 'seen-null-first' hint in the table + export_schema = _get_export_schema(name) + assert set(export_schema.tables["my_table"]["columns"].keys()) == { + "id", + norm_col_a, + "_dlt_id", + "_dlt_load_id", + } + assert ( + export_schema.tables["my_table"]["columns"] + .get(norm_col_a)["x-normalizer"] + .get("seen-null-first", False) + ) + + # 2. Column 'a' gets complex data, should create compound columns + p.run([{"id": 1, col_a: {"col1": 1, "col2": "hey"}}], table_name="my_table") + + # Column 'a' should be removed from the table + # because the values were normalized to compound columns + export_schema = _get_export_schema(name) + if not use_long_col_name: + assert set(export_schema.tables["my_table"]["columns"].keys()) == { + "id", + "_dlt_id", + "_dlt_load_id", + "a__col1", + "a__col2", + } + else: + # TODO: Currently we don't properly remove columns with `seen-null-first` hint + # when they become compound column(s) in the same table if they have very long names + # because we're merely using the naming convention's path separator + # to detect whether a column is a compund column. + # See Normalize.clean_x_normalizer and JsonLItemsNormalizer._coerce_null_value + assert set(export_schema.tables["my_table"]["columns"].keys()) == { + "id", + "_dlt_id", + "_dlt_load_id", + norm_col_a, + p.default_schema.naming.shorten_fragments(norm_col_a, "col1"), + p.default_schema.naming.shorten_fragments(norm_col_a, "col2"), + } diff --git a/uv.lock b/uv.lock index 9121783d4a..6568160d1d 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.9.2, <3.15" resolution-markers = [ "python_full_version >= '3.14' and os_name == 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'", @@ -51,55 +51,130 @@ wheels = [ [[package]] name = "adbc-driver-manager" -version = "1.6.0" +version = "1.8.0" source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.10' and os_name == 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'", + "python_full_version < '3.10' and os_name != 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'", + "python_full_version < '3.10' and os_name == 'nt' and platform_python_implementation == 'PyPy' and sys_platform != 'emscripten'", + "python_full_version < '3.10' and os_name != 'nt' and platform_python_implementation == 'PyPy' and sys_platform != 'emscripten'", + "python_full_version < '3.10' and os_name == 'nt' and sys_platform == 'emscripten'", + "python_full_version < '3.10' and os_name != 'nt' and sys_platform == 'emscripten'", +] dependencies = [ - { name = "typing-extensions" }, + { name = "typing-extensions", marker = "python_full_version < '3.10'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/89/ed/e2b548e9ffe19a405ea4afb0679805b7da981bdc0366017cb6c826e1dae1/adbc_driver_manager-1.6.0.tar.gz", hash = "sha256:618659313a5c712f7938ab35e8f8bae1b80e9ed0c7a8582b2ec9174a88a442ba", size = 109319, upload-time = "2025-05-06T00:43:14.08Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f0/1a/e9e6d1814e7ffc9bfb75327ef1c15c66c74a2f41786cce7f685b9a1cc059/adbc_driver_manager-1.6.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:d4b84cb733494bbcc7246a795c8be84fdb36dad472ec95c0fbc5c24f5df493c0", size = 385775, upload-time = "2025-05-06T00:42:08.761Z" }, - { url = "https://files.pythonhosted.org/packages/6c/dc/5aacf26256b47406f5806163d90e0c7c5fd36da00396af93c719ccd1e7b0/adbc_driver_manager-1.6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fcd5b89de4ff1c5ba313d789ac3905541bd8a897e42ca6684ca19a3f47ae53f3", size = 372816, upload-time = "2025-05-06T00:42:10.19Z" }, - { url = "https://files.pythonhosted.org/packages/64/50/8dbd1860699b6adf9f095fafe48375c7e7cbf4d6f2b1d069f8dacec1dd27/adbc_driver_manager-1.6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f5c66c3824512faa63b70b1bc94d74aa1a573c3c49ab3c7b0226ba5655268e4", size = 2054041, upload-time = "2025-05-06T00:42:11.661Z" }, - { url = "https://files.pythonhosted.org/packages/1c/6a/c13691815c2619a2f5cdbd0cb310d35681e3cbb6dcbec7f39df143a84be7/adbc_driver_manager-1.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:359dafe2ccc0f0de3f10faa0e462cb2518b9ffb495f103f7ec2b437dcff600fc", size = 2076092, upload-time = "2025-05-06T00:42:12.896Z" }, - { url = "https://files.pythonhosted.org/packages/a7/a1/f8c2d1d1fbe4973833a9a6679bac060ece23303af3b3cfaf36bbfcddb93f/adbc_driver_manager-1.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:b6f05c0e53d556bd5ff9fa90cc4c37733bde8a7ed0ce7359cd2836ff3c3242a6", size = 538557, upload-time = "2025-05-06T00:42:14.48Z" }, - { url = "https://files.pythonhosted.org/packages/e7/99/0f1338830a6ff886b568a29f5eec874f599c8f7550b18876410fea753ca8/adbc_driver_manager-1.6.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:09f760c7ed2ec0cb2087800b16132ed433d628e6131bbf416eea2dca89294f09", size = 386991, upload-time = "2025-05-06T00:42:15.822Z" }, - { url = "https://files.pythonhosted.org/packages/70/85/da32d443e8b7bafbec0dd6d8d4560484a1ca318060154f2de0e6e60e14c2/adbc_driver_manager-1.6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0386d29c8fae0bb78f2bf50939b37b22449fdd14ea6ea4e99c491fc85257c242", size = 373667, upload-time = "2025-05-06T00:42:16.884Z" }, - { url = "https://files.pythonhosted.org/packages/cd/2b/5416197f2043001196b773c101e15ab4432ff7abeb7a7fa326ea4042380d/adbc_driver_manager-1.6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7c37acc9b1e1430e4a0c5435eb2d5f4302443306835ad3dafd927aa134a98127", size = 2170777, upload-time = "2025-05-06T00:42:17.981Z" }, - { url = "https://files.pythonhosted.org/packages/7e/ec/16eefe1c58cec292ab47acd984de21ebe22bd74cc63e777aa61036efddba/adbc_driver_manager-1.6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d3ade5a5d1b94ab21a0e7f61f043aa4d04ace14e8dcf70c5abd1b5a623eaa18b", size = 2180866, upload-time = "2025-05-06T00:42:19.76Z" }, - { url = "https://files.pythonhosted.org/packages/ce/80/f1eb85e3f2bf6fa4efa1cd7f5c9728728bad02ef5009b6bc86baf9d5b495/adbc_driver_manager-1.6.0-cp311-cp311-win_amd64.whl", hash = "sha256:fc99d4dabf4441be574cb8224ea367ec1e144894f9c6076b031db45c3244f72a", size = 539877, upload-time = "2025-05-06T00:42:21.354Z" }, - { url = "https://files.pythonhosted.org/packages/25/29/e2d6459d0f502b3042d16e8e30d79c3eb137eac64dd6db0a7b02ba662bfe/adbc_driver_manager-1.6.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:e572ed22d1615034e7e90499af74ed21d624cc54c1d5ec8aa3e0ec4ca4a654f7", size = 385342, upload-time = "2025-05-06T00:42:22.498Z" }, - { url = "https://files.pythonhosted.org/packages/40/b8/badb83c73cfa4dfff741ba2b338c5a25480c220afba9e592b153212bf47c/adbc_driver_manager-1.6.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1a251977656f574d3881b5964b0611c62f252214de255f6d88a494965f15eba2", size = 370867, upload-time = "2025-05-06T00:42:23.604Z" }, - { url = "https://files.pythonhosted.org/packages/7f/bb/eee9daffd175f30e301e5f9eb233564c0535b28c324b424bd24c13516059/adbc_driver_manager-1.6.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3e6dd7b657029d9ef5d4fb5bc7b976be90b552c44442cd39e89eb410663db44", size = 2145818, upload-time = "2025-05-06T00:42:25.469Z" }, - { url = "https://files.pythonhosted.org/packages/11/e4/ed90877f09d1c73ff47cc120bd82296dc9ec34299313f1ed661f79578d5f/adbc_driver_manager-1.6.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64306174f149c3ceeb081997aa46682424a000b00eb8c2e9e8df022ccdf6f1ec", size = 2173678, upload-time = "2025-05-06T00:42:27.105Z" }, - { url = "https://files.pythonhosted.org/packages/32/38/3038af0c48b166c58d8a038d23e3b6b49c386845400eed2334c6f2b0741a/adbc_driver_manager-1.6.0-cp312-cp312-win_amd64.whl", hash = "sha256:31f1857413a2f9572aba8a0236af36cc3da46a1720ea8747c62948b626010b98", size = 537249, upload-time = "2025-05-06T00:42:28.324Z" }, - { url = "https://files.pythonhosted.org/packages/7b/0a/1bd66b56514f7412fb737cf9ec38a1e32576ab6b2ed5aab74e890fb10b50/adbc_driver_manager-1.6.0-cp313-cp313-macosx_10_15_x86_64.whl", hash = "sha256:f75a65f5fb4aeac33b8b08c054335ae5a7bc5de848d7b036398bff876119cc27", size = 383339, upload-time = "2025-05-06T00:42:29.487Z" }, - { url = "https://files.pythonhosted.org/packages/18/5a/c8ad32c5d0689aae1a9fbf4acfd5605664b3d077298dc27a6e216e601691/adbc_driver_manager-1.6.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0a9e2be3fca404e3b78b6fafb1e61d5a08565a7815debc53d049cc5fbe0c955d", size = 368543, upload-time = "2025-05-06T00:42:30.765Z" }, - { url = "https://files.pythonhosted.org/packages/33/bb/a9e1daa66b09b33852a4e592e951a29e6ee055d88e792b64eb5761a4f011/adbc_driver_manager-1.6.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:83dfde4c8d2f130be23048800117a8f3166b797d1442d74135ce7611ab26e812", size = 2141507, upload-time = "2025-05-06T00:42:32.246Z" }, - { url = "https://files.pythonhosted.org/packages/d3/49/b5e260deff3d218a17fe23a1313bb3c033d846bf74505c297f74d2c8abfe/adbc_driver_manager-1.6.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41972465fa4db46bf151cc37000d0bd29c87c2eabbc81f502f0b6932c235f213", size = 2173133, upload-time = "2025-05-06T00:42:33.933Z" }, - { url = "https://files.pythonhosted.org/packages/bf/5f/a04791038cb659c8e1e7fb4a22d75a9fd3e3109a22822bd80beea0046dc4/adbc_driver_manager-1.6.0-cp313-cp313-win_amd64.whl", hash = "sha256:0e8ffb182fafe1e6ae12964a833700daacc55f7abfdc2ada8b5214b18108d87b", size = 535018, upload-time = "2025-05-06T00:42:35.574Z" }, - { url = "https://files.pythonhosted.org/packages/da/b7/a19fb5920cc56893ca644d2a6b855386a18753205b2f2d5f604c3a323f79/adbc_driver_manager-1.6.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:ef82b3e7661420887c04f7cef04d76a465445b8d2c20bcd2d7da6aa1a23aa7ce", size = 387069, upload-time = "2025-05-06T00:42:36.766Z" }, - { url = "https://files.pythonhosted.org/packages/bb/b4/e9c6b18b899142f70086b3fbc92e99723e2b4070a72ae9475a37fa196c82/adbc_driver_manager-1.6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fc49efbdee89e98535b7692bebe79f9760b90482207812af84550878c0bf1059", size = 373833, upload-time = "2025-05-06T00:42:37.938Z" }, - { url = "https://files.pythonhosted.org/packages/5a/f6/0fe0910b517d7ac2ee70d89d0858b147f1148b1495280dc514beeb1d128d/adbc_driver_manager-1.6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e0cd224e5a29af7c63a3a8d6502c0438a05820f4cda48be6d274fe39a1236bae", size = 2056528, upload-time = "2025-05-06T00:42:39.039Z" }, - { url = "https://files.pythonhosted.org/packages/b2/11/25dede4885fc2f4f605d4ce9c2353fd17c06a6c786bdc62cac4dc97ac39c/adbc_driver_manager-1.6.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bab52976384586fa6ad991a9d1aff0a04871e72f300112ee80f240772498675a", size = 2075586, upload-time = "2025-05-06T00:42:40.342Z" }, - { url = "https://files.pythonhosted.org/packages/8a/7c/61de80a8f42ed511f05b6577c7082f1237fece73d55fbd7f1d9070b0d191/adbc_driver_manager-1.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:bb28324c0276df20477a527e291b37ed611a9fad5968ab252295002179ae9a6d", size = 540165, upload-time = "2025-05-06T00:42:41.604Z" }, -] - -[[package]] -name = "adbc-driver-postgresql" -version = "1.6.0" +sdist = { url = "https://files.pythonhosted.org/packages/55/2a/00fe4974b7d134c8d0691a87f09460d949e607e1ef65a022c665e8bde64f/adbc_driver_manager-1.8.0.tar.gz", hash = "sha256:88ca0f4d8c02fc6859629acaf0504620da17a39549e64d4098a3497f7f1eb2d0", size = 203568, upload-time = "2025-09-12T12:31:24.233Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/17/00/5c30fbb6c218599b9d6ee29df6e999c144f792b5790da31a23d6513bde83/adbc_driver_manager-1.8.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:fe3a1beb0f603468e3c4e7c03fccab1af584b6b606ab9707a168d17b7bab01a7", size = 533919, upload-time = "2025-09-12T12:29:40.317Z" }, + { url = "https://files.pythonhosted.org/packages/af/cc/6a0bb6c858ee8316d510b1c9d184cd348b98c4cffd212e79072bf44dd436/adbc_driver_manager-1.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a9bba93fe8bba7f8c23ad2db0e1441fcd9672f3d900c2791437ee8058bfa6a70", size = 511549, upload-time = "2025-09-12T12:29:42.263Z" }, + { url = "https://files.pythonhosted.org/packages/91/61/742daad0325a1ad97602bc12a5dadb15ac73e7b7db20f2caf0a66e87ef45/adbc_driver_manager-1.8.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:18ce935cc2220b3df065dd98b049beec1c9abacd79ed6f7dfea953d9c3e9404b", size = 3023642, upload-time = "2025-09-12T12:29:44.874Z" }, + { url = "https://files.pythonhosted.org/packages/e9/d8/02f5ce9da49961f97c3ee184f42feb8f9bf5e77c80cacc3fe42a81b11325/adbc_driver_manager-1.8.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c01c66c37e3e97d8891bb217f2d2f6c33c6cd25bf799aefcb42ed99c76a6ed36", size = 3039802, upload-time = "2025-09-12T12:29:46.576Z" }, + { url = "https://files.pythonhosted.org/packages/07/8b/affdc2ab3baf6c68b7642e0246861b1db01a28cc33245ddf2ea26dbff7cb/adbc_driver_manager-1.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:66c7d1319c78fc66f09532f21bc9baf0435a787f1db17b99c46c9a820b9c9253", size = 710628, upload-time = "2025-09-12T12:29:47.735Z" }, + { url = "https://files.pythonhosted.org/packages/4d/0c/2bb08c26a551aae886289fab8ab6d1bf03f4bef5b74632123500a2bc6662/adbc_driver_manager-1.8.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:58c10f81134bf8a528fab3848ac14447f3fe158d9fbc84197e79a24827f94f2a", size = 537727, upload-time = "2025-09-12T12:29:50.082Z" }, + { url = "https://files.pythonhosted.org/packages/a9/67/f2e1694875ccbc72c15c334e1ef2f4338b4cb098ba217f4e535d92d5d2f7/adbc_driver_manager-1.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f59794ae27eef7a17be5583d46b746749b3cbae5e58b0fe0f44746e8498d6f5c", size = 516680, upload-time = "2025-09-12T12:29:52.51Z" }, + { url = "https://files.pythonhosted.org/packages/f5/7d/65a41108cb3c1a87e570cf80a50ca94521f748a58780a41d61ea1d946051/adbc_driver_manager-1.8.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fed9a2cb46602cff67f503bbf55c6ee2e69a7e5c07a08514b5bd27a656a3e40b", size = 3103357, upload-time = "2025-09-12T12:29:55.226Z" }, + { url = "https://files.pythonhosted.org/packages/43/15/6e22524aadc7ea82c0868492cdf7e28ab30b476edd5d3d6ef29a882775ec/adbc_driver_manager-1.8.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:349fecd931e5211f00ce00d109fc80a484046fe41644aa402b97496919aa8c2a", size = 3113074, upload-time = "2025-09-12T12:29:57.453Z" }, + { url = "https://files.pythonhosted.org/packages/ca/a1/05f66007556623a7fb37af6535fe19377d2f4757bf0c94f64f350521c9dc/adbc_driver_manager-1.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:92105ae34a42603c7f64b4b0f2d851380c018e9c9f4e9a764a01b1b6f1fa6156", size = 712252, upload-time = "2025-09-12T12:29:59.162Z" }, + { url = "https://files.pythonhosted.org/packages/19/c7/05b5559eff9a42c53c47d86e32aa0b15bd206ef4be04f3a678da7871a8dd/adbc_driver_manager-1.8.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:0e6bbe0b026a17c69c1e7410a8df2366bb80803be0f0d8a7eed2defbed313a65", size = 537879, upload-time = "2025-09-12T12:30:00.798Z" }, + { url = "https://files.pythonhosted.org/packages/25/f0/d7ed70a28933e2c6b95455306c005d9022fc558e26e759ed65fce0537b79/adbc_driver_manager-1.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e5f0f89d13b8f86dc20522988caceab37085fe155ebbea4e9013a7962170011c", size = 512702, upload-time = "2025-09-12T12:30:02.543Z" }, + { url = "https://files.pythonhosted.org/packages/37/a6/fc66e7b72857589ba5cdd0dcfc388ea746ed805caf4031580b1c065481fa/adbc_driver_manager-1.8.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:abd11c6ecdc8119641d2a929e50c9f6ff822b322859bf08a085e7ba9d1adb399", size = 3086175, upload-time = "2025-09-12T12:30:04.491Z" }, + { url = "https://files.pythonhosted.org/packages/e7/90/4780e8cab75f11644d260a73307445254288405352a99cfb3b2889c50e80/adbc_driver_manager-1.8.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f7689b0cf30d77532189b30762e3f6a347275e57e511e885f0eba45ce40ce02c", size = 3113622, upload-time = "2025-09-12T12:30:06.665Z" }, + { url = "https://files.pythonhosted.org/packages/c5/b4/ed76afa37c344395a33d1f894dcd82b5cee2281925c235405a9078d10a29/adbc_driver_manager-1.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:3f0454ec6fc2b5d3c3629b504ee65dbded2516412647070e26cdc9c14341ac74", size = 703323, upload-time = "2025-09-12T12:30:07.984Z" }, + { url = "https://files.pythonhosted.org/packages/56/79/76d505f43c6195920a41f812192bbd5fb1a490ade1c81fe5ba9f07a86f23/adbc_driver_manager-1.8.0-cp313-cp313-macosx_10_15_x86_64.whl", hash = "sha256:04e0676f7bd16dd7d7c403f506b7a22a542fe89f4471526c82cfd546353b125f", size = 536549, upload-time = "2025-09-12T12:30:09.513Z" }, + { url = "https://files.pythonhosted.org/packages/9f/1b/61e9badd21f0936a43692275f84dbf4baa4f39d4100042a14edbf9654a4d/adbc_driver_manager-1.8.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6dddf0ae5b8d636015b1f7fc6972167c1824bd950f3ed6a178d083e89dfd322a", size = 510497, upload-time = "2025-09-12T12:30:10.837Z" }, + { url = "https://files.pythonhosted.org/packages/9c/52/501e0d11b2ba9fca1eb2698cb56ff14c94e8a1cad421a9c90c2e23edfbd8/adbc_driver_manager-1.8.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d70431e659e8e51d222fa238410085f0c13921154e0a17e9a687f7896667138f", size = 3085322, upload-time = "2025-09-12T12:30:12.893Z" }, + { url = "https://files.pythonhosted.org/packages/38/5e/0a79d48fe44cc8387221fff44dfa956c5ce6131a72f08e393748cbb090e0/adbc_driver_manager-1.8.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8b4d34618a5e64e678210dfdf76704f11e09529fc221dbd576ead6c14555883d", size = 3107704, upload-time = "2025-09-12T12:30:14.861Z" }, + { url = "https://files.pythonhosted.org/packages/71/42/689194767d6ec09bb9b9216c27000ff193199c9bd7d7d5c6c5aad1bc2400/adbc_driver_manager-1.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:82da1442b6d786d2f87ac0f3dd0bbc7462ec90cb3316168a4db88044d470baa2", size = 702235, upload-time = "2025-09-12T12:30:24.469Z" }, + { url = "https://files.pythonhosted.org/packages/83/45/4e98be65dab4e61c9c0227c4908ab9a5db1db320eec8badfd5b253c5854b/adbc_driver_manager-1.8.0-cp313-cp313t-macosx_10_15_x86_64.whl", hash = "sha256:bc1677c06998361b5c3237d9f408b69fb23942f7157e2dd4ce515f658a60d3d4", size = 551974, upload-time = "2025-09-12T12:30:16.782Z" }, + { url = "https://files.pythonhosted.org/packages/8f/4a/c4d83125e1dc0532006b3fd3c816a2c2956dedb881a89e0cb47f4eda1bcc/adbc_driver_manager-1.8.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:86cb394bdd3ac298761e0ff8ceab8ad9c2f6ce5650d7f4ac7c8609bc74876929", size = 529497, upload-time = "2025-09-12T12:30:18.756Z" }, + { url = "https://files.pythonhosted.org/packages/c7/6c/d1752ed66109fe1866d9aabe0f6a930b8443d8e62d17f333a38b97b37b85/adbc_driver_manager-1.8.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1a834f2f269285d1308aa97ae6000002acdb79d70733735f16b3c9918ca88c1f", size = 3148300, upload-time = "2025-09-12T12:30:21.301Z" }, + { url = "https://files.pythonhosted.org/packages/3d/59/971e28a01382590ead8352d83a2d77b1f8beb2c4cc1b59036e1b68fd59e1/adbc_driver_manager-1.8.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8fcf38cc4b993336f49b6d1e407d4741ed1ea898f58088314005f8da7daf47db", size = 3134384, upload-time = "2025-09-12T12:30:23.252Z" }, + { url = "https://files.pythonhosted.org/packages/54/4e/0f826b68d5e0d50f8b1207514d0d17bf60663b7d51efd21f3754b5885450/adbc_driver_manager-1.8.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f954783e306ff1e1602d8390e74e00357142c382bff22ab159e8f94a95c8cfcb", size = 3082317, upload-time = "2025-09-12T12:30:26.8Z" }, + { url = "https://files.pythonhosted.org/packages/da/bf/ce5efe35be83b652e4b6059cfff48b59d648560a9dc99caac8da0a3441cd/adbc_driver_manager-1.8.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:61d5ec92af49a76345db1ae0a3890789797078b5b9948d550a47e8cfaa27cc19", size = 3089760, upload-time = "2025-09-12T12:30:28.772Z" }, + { url = "https://files.pythonhosted.org/packages/f2/b3/d3254595b61890da1dc6d44178abe10262136d20aeffae4a86d3e289371e/adbc_driver_manager-1.8.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4f68df12cfbffaf4bec832ed406fb6ce978fd7dba8a4e8e377c9658fcd83b6a3", size = 3147028, upload-time = "2025-09-12T12:30:30.53Z" }, + { url = "https://files.pythonhosted.org/packages/68/ba/82d1f9521bc755d8d0d66eaac47032e147c2fe850eb308ba613710b27493/adbc_driver_manager-1.8.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a4402633d548e3ecdcf89a7133fd72b88a807a3c438e13bdb61ccc79d6239a65", size = 3133693, upload-time = "2025-09-12T12:30:32.357Z" }, + { url = "https://files.pythonhosted.org/packages/a5/33/5016dffbf2bdfcf181c17db5cae0f9fb4bee34605c87d1a3894e8963f888/adbc_driver_manager-1.8.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:151e21b46dedbbd48be4c7d904efd08fcdce3c1db7faff1ce32c520f3a4ed508", size = 535678, upload-time = "2025-09-12T12:30:33.87Z" }, + { url = "https://files.pythonhosted.org/packages/41/08/d089492c2df0d66f87c16a4223f98cd9e04571c55ba3d2147c25ef6f9d57/adbc_driver_manager-1.8.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a1c839a4b8c7a19d56bc0592596b123ecbdf6e76e28c7db28e562b6ce47f67cf", size = 512661, upload-time = "2025-09-12T12:30:35.604Z" }, + { url = "https://files.pythonhosted.org/packages/5c/56/5024e4da87544d4cf04df4c1f8231c9e91b9b818dd5fc208a5944455dafc/adbc_driver_manager-1.8.0-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:eda25c53cec08290ba8c58f18dbec07ff21b0480e5e0641acc2410f79e477031", size = 3020784, upload-time = "2025-09-12T12:30:37.58Z" }, + { url = "https://files.pythonhosted.org/packages/66/22/d299a8a6aa0a51eecbe0c052aa457c24fbd499c9c096de889c40e7fb1a46/adbc_driver_manager-1.8.0-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c0d7fedaec1ecc1079c19eb0b55bd28e10f68f5c76fd523a37498588b7450ecf", size = 3037489, upload-time = "2025-09-12T12:30:39.838Z" }, + { url = "https://files.pythonhosted.org/packages/e3/37/ab055f5680f7b9dc2019303526f13c1db6a844d03fbaaa36cd36baa2348c/adbc_driver_manager-1.8.0-cp39-cp39-win_amd64.whl", hash = "sha256:07188498dec41bd93753a2ad568dbca779e83f56a4e0339dbfc9cf75bc2e5f01", size = 712651, upload-time = "2025-09-12T12:30:41.658Z" }, +] + +[[package]] +name = "adbc-driver-manager" +version = "1.9.0" source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.14' and os_name == 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'", + "python_full_version >= '3.14' and os_name != 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'", + "python_full_version >= '3.14' and os_name == 'nt' and platform_python_implementation == 'PyPy' and sys_platform != 'emscripten'", + "python_full_version >= '3.14' and os_name != 'nt' and platform_python_implementation == 'PyPy' and sys_platform != 'emscripten'", + "python_full_version >= '3.14' and os_name == 'nt' and sys_platform == 'emscripten'", + "python_full_version >= '3.14' and os_name != 'nt' and sys_platform == 'emscripten'", + "python_full_version == '3.13.*' and os_name == 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'", + "python_full_version == '3.13.*' and os_name != 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'", + "python_full_version == '3.12.*' and os_name == 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'", + "python_full_version == '3.11.*' and os_name == 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'", + "python_full_version == '3.10.*' and os_name == 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'", + "python_full_version == '3.12.*' and os_name != 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'", + "python_full_version == '3.11.*' and os_name != 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'", + "python_full_version == '3.10.*' and os_name != 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'", + "python_full_version == '3.13.*' and os_name == 'nt' and platform_python_implementation == 'PyPy' and sys_platform != 'emscripten'", + "python_full_version == '3.13.*' and os_name != 'nt' and platform_python_implementation == 'PyPy' and sys_platform != 'emscripten'", + "python_full_version == '3.12.*' and os_name == 'nt' and platform_python_implementation == 'PyPy' and sys_platform != 'emscripten'", + "python_full_version == '3.11.*' and os_name == 'nt' and platform_python_implementation == 'PyPy' and sys_platform != 'emscripten'", + "python_full_version == '3.10.*' and os_name == 'nt' and platform_python_implementation == 'PyPy' and sys_platform != 'emscripten'", + "python_full_version == '3.12.*' and os_name != 'nt' and platform_python_implementation == 'PyPy' and sys_platform != 'emscripten'", + "python_full_version == '3.11.*' and os_name != 'nt' and platform_python_implementation == 'PyPy' and sys_platform != 'emscripten'", + "python_full_version == '3.10.*' and os_name != 'nt' and platform_python_implementation == 'PyPy' and sys_platform != 'emscripten'", + "python_full_version == '3.13.*' and os_name == 'nt' and sys_platform == 'emscripten'", + "python_full_version == '3.13.*' and os_name != 'nt' and sys_platform == 'emscripten'", + "python_full_version == '3.12.*' and os_name == 'nt' and sys_platform == 'emscripten'", + "python_full_version == '3.11.*' and os_name == 'nt' and sys_platform == 'emscripten'", + "python_full_version == '3.10.*' and os_name == 'nt' and sys_platform == 'emscripten'", + "python_full_version == '3.12.*' and os_name != 'nt' and sys_platform == 'emscripten'", + "python_full_version == '3.11.*' and os_name != 'nt' and sys_platform == 'emscripten'", + "python_full_version == '3.10.*' and os_name != 'nt' and sys_platform == 'emscripten'", +] dependencies = [ - { name = "adbc-driver-manager" }, - { name = "importlib-resources" }, + { name = "typing-extensions", marker = "python_full_version >= '3.10'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/bf/87/ee97f199ebf0367bb93bab50f7bb4a23f723956e5869871ec51c7530bc2c/adbc_driver_postgresql-1.6.0.tar.gz", hash = "sha256:531b34ee2eb7c17ad5b3a791f05742d5e7c24725671fc8348be2ff48c0bdaf29", size = 18882, upload-time = "2025-05-06T00:43:15.001Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b7/30/570d951dd918fdc7cbe57add0f68c9e76d71caf3f8d07d4f6b435735ba1e/adbc_driver_postgresql-1.6.0-py3-none-macosx_10_15_x86_64.whl", hash = "sha256:a82803bcc7a73967bd08f2e70334b87706ef6dc7895606f7d5d7357df18789de", size = 2688571, upload-time = "2025-05-06T00:42:43.211Z" }, - { url = "https://files.pythonhosted.org/packages/7e/79/c3066a810938cf00a8566b4c89d6dcf6e0f7aaae230112eaf2dc71c643f3/adbc_driver_postgresql-1.6.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:8d72dcf359b98c173e316e9e3761011bea7a67cb41a2d678b66c6834ccad809d", size = 3002077, upload-time = "2025-05-06T00:42:44.688Z" }, - { url = "https://files.pythonhosted.org/packages/16/c0/5967e0d47cf6e27474f630974b044c59e429103daf1a41031ec075013b02/adbc_driver_postgresql-1.6.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a49a88d1d55c347c26e1de1199896b91f7bf581b6efd398e6c7241fa70a3fd20", size = 3192914, upload-time = "2025-05-06T00:42:46.511Z" }, - { url = "https://files.pythonhosted.org/packages/c1/45/2d3514fd2bf7ee7e1e28c45fdb70f2a27db2f8cc8af7e7c28f846dd7b99d/adbc_driver_postgresql-1.6.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c3d09a688fb95bbe5431b488431145d77b3e20f8adbc413b1c2530b6623a9a2a", size = 2849841, upload-time = "2025-05-06T00:42:47.944Z" }, - { url = "https://files.pythonhosted.org/packages/51/26/b06d767a03fec4b0bd77aaaff6ea40de7c98d9e2b52aa646d125f0644298/adbc_driver_postgresql-1.6.0-py3-none-win_amd64.whl", hash = "sha256:23823cc70f8194c61e04b879c6ad6bae4871e1527b727b897cbc3039b6e4b892", size = 2858544, upload-time = "2025-05-06T00:42:49.804Z" }, +sdist = { url = "https://files.pythonhosted.org/packages/cd/b4/09a85ca2bb2ba53d6577745a0aae0766393b69d0ae1e645ff4d34bee6866/adbc_driver_manager-1.9.0.tar.gz", hash = "sha256:d6687acf57f92e469e78d53df6baf70ab62f8886ba8f2e0b25613aecd1807ae9", size = 205762, upload-time = "2025-11-07T01:46:55.953Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/94/8e/2ee9e6364c9bb0da77d2c84c94aad06aa400d71076bb74089baaf9d8f970/adbc_driver_manager-1.9.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:4d85d3ad6a669c62aeb8048d02507912d3265b3655878b2c62b7f17b2df85d31", size = 535485, upload-time = "2025-11-07T01:45:13.071Z" }, + { url = "https://files.pythonhosted.org/packages/b4/79/b429127767c55d72e1ee1a38649485d46efeec0e8068d8f0d88674b7fb20/adbc_driver_manager-1.9.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e125e78fe085742113007c0d73f7470921d42e515a577391fad9140278029b54", size = 516545, upload-time = "2025-11-07T01:45:15.208Z" }, + { url = "https://files.pythonhosted.org/packages/0e/bd/3d0d7a73706e5c990c09e7563a704d8c57289f2ca3ec7d9f26e50f5b39b6/adbc_driver_manager-1.9.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:acc54eb396010370d66900f8571c5820d7eff58f53ead32b8789bb1b2ae90d43", size = 3046392, upload-time = "2025-11-07T01:45:17.285Z" }, + { url = "https://files.pythonhosted.org/packages/71/12/6733250602e938a2f53951aa1c457ceea525f6a6fca174001d7b2e639cab/adbc_driver_manager-1.9.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9ee06ce6a838fa4aa7d15f1f25e889b73bf01223a7c16c62db0a5fc393a34e8c", size = 3061274, upload-time = "2025-11-07T01:45:20.05Z" }, + { url = "https://files.pythonhosted.org/packages/8c/61/2123a83eb0bff26b7b5d278fdc309fea2e3a93397ff98957fbc253f123c6/adbc_driver_manager-1.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:c1c3c67d36e3040ffc1b0222e5954bec2b3226683fc9211780fbb24b54182ef5", size = 714551, upload-time = "2025-11-07T01:45:21.685Z" }, + { url = "https://files.pythonhosted.org/packages/71/6a/6ba149d36fbb0885c9e1e083bf2a111379c3e9823a73c76e85d5c178a94c/adbc_driver_manager-1.9.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:5298667754dfe86ea8b8d46b4f120642ebd977dab825e033d68472a16067edb5", size = 539603, upload-time = "2025-11-07T01:45:23.511Z" }, + { url = "https://files.pythonhosted.org/packages/16/2b/f4eaa3c05a83118e35f3e9fe3c4f3a2ff7941f0b442d47391576a823ca0e/adbc_driver_manager-1.9.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:41b070bfb3a8d730cfb5bd8ea83f6d5131c8b00f8d74acc2ef70f885f82ab211", size = 521199, upload-time = "2025-11-07T01:45:25.761Z" }, + { url = "https://files.pythonhosted.org/packages/c9/a7/424d9741a9b9626db37f31a94d6f06a9723b376cdb86da974947bf53b8d3/adbc_driver_manager-1.9.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ef286238ce7e4e62fae704c77c571da2163de5367b5777bccff012a5d2a1eba2", size = 3125933, upload-time = "2025-11-07T01:45:28.045Z" }, + { url = "https://files.pythonhosted.org/packages/ad/3a/6328f0bc79b1a27db3aebc4ee26e0cead1fc5405a2b01cb75017ef0f8e0c/adbc_driver_manager-1.9.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3bbd24ad6e4562d822ddde595ccd98adab53e15341a4157e5cce3e1161f9b76e", size = 3138043, upload-time = "2025-11-07T01:45:30.082Z" }, + { url = "https://files.pythonhosted.org/packages/d4/18/2841b3046f866f6a20303c28e4f3e829513874f24d178b709823e75feb68/adbc_driver_manager-1.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:4c8e15e75671ae37e20d198836c9cd2aa69c85feec541ab3a15fd6b48c2078d8", size = 715975, upload-time = "2025-11-07T01:45:31.602Z" }, + { url = "https://files.pythonhosted.org/packages/ae/5d/6fbb1fe6b55542e4c2c9deafdc79ffc87da9484703f5a74e700133459f3b/adbc_driver_manager-1.9.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:c27bec486a217586ef2adbf55634070be4f25b0e3642380ebc97a2926c228d6a", size = 539512, upload-time = "2025-11-07T01:45:33.65Z" }, + { url = "https://files.pythonhosted.org/packages/87/5e/cb94f25f41e3be7eea065dedcb67ab71266c02b835966f9f417ba3f386d6/adbc_driver_manager-1.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2d0051e8a25e6d19afe224e46966abc645af1662d2314277d1822a2401800bac", size = 517792, upload-time = "2025-11-07T01:45:35.351Z" }, + { url = "https://files.pythonhosted.org/packages/9a/b6/eac3aaef68fb102fc2b7a7213d0ffe736f7463bd37838110c93764bb5644/adbc_driver_manager-1.9.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e541dda28a9a7e6ac0e9ab3797e8e31153dcc75e333759cb519bcf942805a625", size = 3108751, upload-time = "2025-11-07T01:45:37.863Z" }, + { url = "https://files.pythonhosted.org/packages/64/18/2ddfa947dc6948c68349b22e59bb2f7786177046276a2cd4899a8dd8536b/adbc_driver_manager-1.9.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:df26073ada6069e95deb59f974cfc7aa4bf94f76080c997b1d6225159dd67227", size = 3135707, upload-time = "2025-11-07T01:45:39.72Z" }, + { url = "https://files.pythonhosted.org/packages/b0/46/532aeee27008928baad91b0420b498548a2ed6b7070de907e4d5b5937933/adbc_driver_manager-1.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:ee7dd922983f44e2933d0e539524d5174bf50f663fb09f70989ce5e1319a1d41", size = 707465, upload-time = "2025-11-07T01:45:40.97Z" }, + { url = "https://files.pythonhosted.org/packages/d9/e7/17a9738b8e6f549606f049f616b01b9517288022bf384cf6f2ef3127288c/adbc_driver_manager-1.9.0-cp313-cp313-macosx_10_15_x86_64.whl", hash = "sha256:7f9db5a3b38d4b9042b97dbee3c739b0a075585fbe2c11cdd60ba817a1618b03", size = 537614, upload-time = "2025-11-07T01:45:42.433Z" }, + { url = "https://files.pythonhosted.org/packages/ee/de/ae8d9a532dba87b61201bf64678b142afb07782c014539cef81681f0fa35/adbc_driver_manager-1.9.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:fe4affd71f7facdaf4c04839846b090c945586c36d0fd79cd7fb5a3251d36f85", size = 516016, upload-time = "2025-11-07T01:45:44.195Z" }, + { url = "https://files.pythonhosted.org/packages/a5/e7/c3039fd725636d1d25c339223027a7f999ac23048d4437ab5ad22f42ecdd/adbc_driver_manager-1.9.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0a7961db77730ce0ab326a225dc534b43d4017b28a30703eaac7e3cdf1e5fb7d", size = 3108213, upload-time = "2025-11-07T01:45:46.27Z" }, + { url = "https://files.pythonhosted.org/packages/e8/74/60407d06a3263155495e730e8fa671db976c918aea16b5471de16025b811/adbc_driver_manager-1.9.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f1a8ba1ad7c4b50f5486401cc1bf04d313f74065f0d752ab0145292ab746f0fb", size = 3131425, upload-time = "2025-11-07T01:45:48.312Z" }, + { url = "https://files.pythonhosted.org/packages/35/28/260053893ca51b1f06e2fe652ee267cbfe359a2c4c8312dc0307a0f54579/adbc_driver_manager-1.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:21e0ab1adb0c804955279b0a919ba8dec5264db1e85e43a89af9fbbf8a8e52c9", size = 706437, upload-time = "2025-11-07T01:45:57.327Z" }, + { url = "https://files.pythonhosted.org/packages/30/79/4b478c9023c772ce36fb0dabc6bb49e8fe513e53a606e4bb6819589ddb54/adbc_driver_manager-1.9.0-cp313-cp313t-macosx_10_15_x86_64.whl", hash = "sha256:74f57cee4168ce79babc8c33468bc8eae6917aefc72f2493b0c24164c9a4c29b", size = 552602, upload-time = "2025-11-07T01:45:50.348Z" }, + { url = "https://files.pythonhosted.org/packages/60/56/0613a0a4b307a2bba587fa2b6bd573c0aa433696134272f3ac9b98f38746/adbc_driver_manager-1.9.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:0dbda004d4cc4354bb14bed27451e248022298bd898b246b884a46fc880a93da", size = 533874, upload-time = "2025-11-07T01:45:51.696Z" }, + { url = "https://files.pythonhosted.org/packages/7b/5c/d0a1cfe1b27d982ab3a2dfb1a56482d5ed3e601d2d47a6b20cbcc172fd30/adbc_driver_manager-1.9.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5377949e913483792ae30fac86be04ba377e4be12023d4db709c7eb870a196c1", size = 3170532, upload-time = "2025-11-07T01:45:54.039Z" }, + { url = "https://files.pythonhosted.org/packages/f6/26/e7ebccfce5b77c84386975d42f87fa2debd11dded7350ef493cccbb8c0bf/adbc_driver_manager-1.9.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:da8e2611c657599936968774ec39919c9edbbea6cffc4484c23df5bcb6657e08", size = 3160151, upload-time = "2025-11-07T01:45:55.797Z" }, + { url = "https://files.pythonhosted.org/packages/29/54/d39e9cec6518e734d3390e751ef2f6fd98dbca776809d8739cda23b98408/adbc_driver_manager-1.9.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:36707df1bbe577d9e625186c933a4b4b3db0e16332cdb67b89af1de699029748", size = 537297, upload-time = "2025-11-07T01:45:58.737Z" }, + { url = "https://files.pythonhosted.org/packages/5d/e2/c1ddf128b669f336ee42fea6d275ebfaf732cefb0ba1e37a2fb46c70c0fe/adbc_driver_manager-1.9.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:88fb2f5f6a9934065f08241979bfd9442b797bb290dc8930d4f75e2568f72096", size = 516607, upload-time = "2025-11-07T01:46:00.538Z" }, + { url = "https://files.pythonhosted.org/packages/4e/3a/f2c34724f8244e316cbb9a550e66c8afb786c72be0eef1e6e0f0a4943ddb/adbc_driver_manager-1.9.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:843bbcc78638cd408524c5474d8c83817142bfa5ad49693c056c7cf3d87b8e14", size = 3104038, upload-time = "2025-11-07T01:46:02.42Z" }, + { url = "https://files.pythonhosted.org/packages/5f/ba/9bca6e811d0196d0070b19cee8f1019bfb250d494f98d79445ed8fd22fd6/adbc_driver_manager-1.9.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:33763cdd67ccc9574d15e1affcf3cdaf74351a93d91e6f85ed92704da732c150", size = 3111355, upload-time = "2025-11-07T01:46:03.908Z" }, + { url = "https://files.pythonhosted.org/packages/55/16/6e5895b661028a94ca0c96e174e3531c9e703afbcae3fd0260956094bfa9/adbc_driver_manager-1.9.0-cp314-cp314-win_amd64.whl", hash = "sha256:c5995b029cc99d80f2dc4bd34c41320fe8185da0fdc115d424f7699d1d7ffbac", size = 721478, upload-time = "2025-11-07T01:46:12.358Z" }, + { url = "https://files.pythonhosted.org/packages/97/d5/7c9ac5cc73e710245bed0685982c806799b8206f2f4ce39a83c902d2e313/adbc_driver_manager-1.9.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:3629bdc16eb0180c86dd17931066e1567a92f6fccd515b9e4eb5098ef906d9bf", size = 552607, upload-time = "2025-11-07T01:46:05.21Z" }, + { url = "https://files.pythonhosted.org/packages/0c/9c/f8c59509228cbdf22d40fa4182d64f9e93c02307f028b0ae218973543fe0/adbc_driver_manager-1.9.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:d68b9c6c91ac97919ecc9255ccefda020fb6784b6aeb43d6d7b8a3509dc534f5", size = 533799, upload-time = "2025-11-07T01:46:06.726Z" }, + { url = "https://files.pythonhosted.org/packages/73/ee/ed402b9766ae3fe1b699965157720cf95222d86d152f766e5eff34d18599/adbc_driver_manager-1.9.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:12d012ebb18f46037dc36079222449dc4f2f9b8a5df753a0653a35ff211fefa2", size = 3170551, upload-time = "2025-11-07T01:46:08.745Z" }, + { url = "https://files.pythonhosted.org/packages/e3/5a/c7377ae2f3d4518b6e28e1a01e279e2ac060887c02a09911aba3b34f1ee1/adbc_driver_manager-1.9.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:af252317c41aaea1de9ec40bfbaf300c773dfb2bd4445aa819b118349e84ebd4", size = 3159071, upload-time = "2025-11-07T01:46:10.743Z" }, ] [[package]] @@ -1633,6 +1708,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/db/91/66065d933b4814295fd0ddc16a66ef193dff14bf8d15895723f38640a3ab/db_dtypes-1.4.3-py3-none-any.whl", hash = "sha256:a1c92b819af947fae1701d80a71f2a0eac08f825ca52cf0c68aeba80577ae966", size = 18110, upload-time = "2025-05-12T13:54:20.146Z" }, ] +[[package]] +name = "dbc" +version = "0.1.0" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ae/53/d9cbbbd55ea9cf3028f11870f1f25cc3d920836f0090de02632088f883b2/dbc-0.1.0-py3-none-macosx_12_0_arm64.whl", hash = "sha256:9469e30a81b2350901259d6a6b53d1cb579692f534a5d077d294a83a7606f397", size = 3511476, upload-time = "2025-10-27T19:35:51.847Z" }, + { url = "https://files.pythonhosted.org/packages/2a/f6/8d0db43cd40506c458bf8533b705ccb04f2c6fe731d74cb8c853f540820c/dbc-0.1.0-py3-none-macosx_12_0_x86_64.whl", hash = "sha256:d627f64b1ada1abdc20c583e778ca7f8c3a2aa738e77fa49bfa70cff41808d19", size = 3854094, upload-time = "2025-10-27T19:35:53.871Z" }, + { url = "https://files.pythonhosted.org/packages/a0/33/54c71920dc2f0cee080363173fe7cdbe008e68ea0d2efbca5a7631522727/dbc-0.1.0-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:1ccc979125118d489d769fc1b6852e0aac098737b500e9f733cfdc82e6555550", size = 3686259, upload-time = "2025-10-27T19:35:55.57Z" }, + { url = "https://files.pythonhosted.org/packages/2d/0d/585b826793452a72b098f5741939d4a1592c2dfc2181800122356d8a487b/dbc-0.1.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:52c17aa305a3b9580d055504f2c827217ef341e8b8662ec9a5f1dff18ddb39e0", size = 3353493, upload-time = "2025-10-27T19:35:56.86Z" }, + { url = "https://files.pythonhosted.org/packages/f4/08/501d584a97201499d62bf58f6b6b919b36e38e50cff311aeb7935aee1699/dbc-0.1.0-py3-none-win_amd64.whl", hash = "sha256:8ca305c1b3ff5d869f1084a95c8f538caf44580c700e788c7222ff52dfde1da3", size = 3796342, upload-time = "2025-10-27T19:35:58.5Z" }, +] + [[package]] name = "dbt-athena-community" version = "1.7.2" @@ -1929,8 +2016,8 @@ dependencies = [ { name = "giturlparse" }, { name = "humanize" }, { name = "jsonpath-ng" }, - { name = "orjson", version = "3.10.18", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.10' and python_full_version < '3.14' and os_name != 'nt' and sys_platform != 'emscripten') or (python_full_version >= '3.11' and python_full_version < '3.14' and os_name == 'nt' and sys_platform != 'emscripten')" }, - { name = "orjson", version = "3.11.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14' or (python_full_version < '3.10' and os_name != 'nt' and sys_platform != 'emscripten') or (python_full_version < '3.11' and os_name == 'nt' and sys_platform != 'emscripten')" }, + { name = "orjson", version = "3.10.18", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.10' and python_full_version < '3.14' and os_name != 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten') or (python_full_version >= '3.11' and python_full_version < '3.14' and os_name == 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten')" }, + { name = "orjson", version = "3.11.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14' or (python_full_version < '3.10' and os_name != 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten') or (python_full_version < '3.11' and os_name == 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten')" }, { name = "packaging" }, { name = "pathvalidate" }, { name = "pendulum" }, @@ -2093,7 +2180,9 @@ workspace = [ [package.dev-dependencies] adbc = [ - { name = "adbc-driver-postgresql" }, + { name = "adbc-driver-manager", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, + { name = "adbc-driver-manager", version = "1.9.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "dbc" }, ] airflow = [ { name = "apache-airflow", marker = "python_full_version < '3.12'" }, @@ -2234,8 +2323,8 @@ requires-dist = [ { name = "marimo", marker = "extra == 'workspace'", specifier = ">=0.14.5" }, { name = "mcp", marker = "python_full_version >= '3.10' and extra == 'workspace'", specifier = ">=1.2.1" }, { name = "orjson", marker = "python_full_version >= '3.14'", specifier = ">=3.11.0" }, - { name = "orjson", marker = "sys_platform != 'emscripten'", specifier = ">=3.10.1" }, { name = "orjson", marker = "platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'", specifier = ">=3.6.7,!=3.9.11,!=3.9.12,!=3.9.13,!=3.9.14,!=3.10.1,<4" }, + { name = "orjson", marker = "platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'", specifier = ">=3.10.1" }, { name = "packaging", specifier = ">=21.1" }, { name = "paramiko", marker = "extra == 'sftp'", specifier = ">=3.3.0" }, { name = "pathspec", marker = "extra == 'workspace'", specifier = ">=0.11.2" }, @@ -2295,7 +2384,10 @@ requires-dist = [ provides-extras = ["gcp", "bigquery", "postgres", "redshift", "parquet", "duckdb", "ducklake", "filesystem", "s3", "gs", "az", "sftp", "http", "snowflake", "motherduck", "cli", "athena", "weaviate", "mssql", "synapse", "qdrant", "databricks", "clickhouse", "dremio", "lancedb", "deltalake", "sql-database", "sqlalchemy", "pyiceberg", "postgis", "workspace", "dbml"] [package.metadata.requires-dev] -adbc = [{ name = "adbc-driver-postgresql", specifier = ">=1.6.0" }] +adbc = [ + { name = "adbc-driver-manager", specifier = ">=1.8.0" }, + { name = "dbc", specifier = ">=0.1.0" }, +] airflow = [{ name = "apache-airflow", marker = "python_full_version < '3.12'", specifier = ">=2.8.0,<3" }] dashboard-tests = [ { name = "playwright", specifier = ">=1.52.0,<2" }, @@ -3768,18 +3860,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/59/9b/ecce94952ab5ea74c31dcf9ccf78ccd484eebebef06019bf8cb579ab4519/importlib_metadata-6.11.0-py3-none-any.whl", hash = "sha256:f0afba6205ad8f8947c7d338b5342d5db2afbfd82f9cbef7879a9539cc12eb9b", size = 23427, upload-time = "2023-12-03T17:33:08.965Z" }, ] -[[package]] -name = "importlib-resources" -version = "6.5.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "zipp", marker = "python_full_version < '3.10'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/cf/8c/f834fbf984f691b4f7ff60f50b514cc3de5cc08abfc3295564dd89c5e2e7/importlib_resources-6.5.2.tar.gz", hash = "sha256:185f87adef5bcc288449d98fb4fba07cea78bc036455dd44c5fc4a2fe78fed2c", size = 44693, upload-time = "2025-01-03T18:51:56.698Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a4/ed/1f1afb2e9e7f38a545d628f864d562a5ae64fe6f7a10e28ffb9b185b4e89/importlib_resources-6.5.2-py3-none-any.whl", hash = "sha256:789cfdc3ed28c78b67a06acb8126751ced69a3d5f79c095a98298cd8a760ccec", size = 37461, upload-time = "2025-01-03T18:51:54.306Z" }, -] - [[package]] name = "inflection" version = "0.5.1" @@ -5688,13 +5768,6 @@ resolution-markers = [ "python_full_version == '3.12.*' and os_name != 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'", "python_full_version == '3.11.*' and os_name != 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'", "python_full_version == '3.10.*' and os_name != 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'", - "python_full_version == '3.13.*' and os_name == 'nt' and platform_python_implementation == 'PyPy' and sys_platform != 'emscripten'", - "python_full_version == '3.13.*' and os_name != 'nt' and platform_python_implementation == 'PyPy' and sys_platform != 'emscripten'", - "python_full_version == '3.12.*' and os_name == 'nt' and platform_python_implementation == 'PyPy' and sys_platform != 'emscripten'", - "python_full_version == '3.11.*' and os_name == 'nt' and platform_python_implementation == 'PyPy' and sys_platform != 'emscripten'", - "python_full_version == '3.12.*' and os_name != 'nt' and platform_python_implementation == 'PyPy' and sys_platform != 'emscripten'", - "python_full_version == '3.11.*' and os_name != 'nt' and platform_python_implementation == 'PyPy' and sys_platform != 'emscripten'", - "python_full_version == '3.10.*' and os_name != 'nt' and platform_python_implementation == 'PyPy' and sys_platform != 'emscripten'", ] sdist = { url = "https://files.pythonhosted.org/packages/81/0b/fea456a3ffe74e70ba30e01ec183a9b26bec4d497f61dcfce1b601059c60/orjson-3.10.18.tar.gz", hash = "sha256:e8da3947d92123eda795b68228cafe2724815621fe35e8e320a9e9593a4bcd53", size = 5422810, upload-time = "2025-04-29T23:30:08.423Z" } wheels = [ @@ -5785,9 +5858,6 @@ resolution-markers = [ "python_full_version == '3.10.*' and os_name == 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'", "python_full_version < '3.10' and os_name == 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'", "python_full_version < '3.10' and os_name != 'nt' and platform_python_implementation != 'PyPy' and sys_platform != 'emscripten'", - "python_full_version == '3.10.*' and os_name == 'nt' and platform_python_implementation == 'PyPy' and sys_platform != 'emscripten'", - "python_full_version < '3.10' and os_name == 'nt' and platform_python_implementation == 'PyPy' and sys_platform != 'emscripten'", - "python_full_version < '3.10' and os_name != 'nt' and platform_python_implementation == 'PyPy' and sys_platform != 'emscripten'", ] sdist = { url = "https://files.pythonhosted.org/packages/29/87/03ababa86d984952304ac8ce9fbd3a317afb4a225b9a81f9b606ac60c873/orjson-3.11.0.tar.gz", hash = "sha256:2e4c129da624f291bcc607016a99e7f04a353f6874f3bd8d9b47b88597d5f700", size = 5318246, upload-time = "2025-07-15T16:08:29.194Z" } wheels = [