Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 45 additions & 2 deletions ibis/expr/types/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -934,13 +934,56 @@ def unbind(self) -> ir.Table:
return self.op().replace(rule).to_expr()

def as_table(self) -> ir.Table:
"""Convert an expression to a table."""
"""Convert a Scalar, Column, or Table to a [Table](./expression-tables.qmd#ibis.expr.types.Table).

- Calling this on a Table is a no-op.
- Calling this on a Column will return a single-column table.
- Calling this on a Scalar will return a single-row, single-column table.

Returns
-------
Table
A table expression
"""
raise NotImplementedError(
f"{type(self)} expressions cannot be converted into tables"
)

def as_scalar(self) -> ir.Scalar:
"""Convert an expression to a scalar."""
"""Tell ibis to treat the expression as a scalar.

Ibis cannot know until execution time whether a Column or Table expression
contains only one row or many rows,

This method is a way to explicitly tell ibis to trust you that
this expression will only contain one row at execution time.
This allows you to use this expression with other tables.

If the expression is a literal, it will be returned as is. If it depends
on a table, it will be turned to a scalar subquery.

Returns
-------
Scalar
A scalar subquery or a literal

Examples
--------
>>> import ibis
>>> ibis.options.interactive = True
>>> t = ibis.examples.penguins.fetch()
>>> max_gentoo_weight = t.filter(t.species == "Gentoo").body_mass_g.max()
>>> light_penguins = t.filter(t.body_mass_g < max_gentoo_weight / 2)
>>> light_penguins.species.value_counts().order_by("species")
┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ species ┃ species_count ┃
┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ string │ int64 │
├───────────┼───────────────┤
│ Adelie │ 15 │
│ Chinstrap │ 2 │
└───────────┴───────────────┘
"""
raise NotImplementedError(
f"{type(self)} expressions cannot be converted into scalars"
)
Expand Down
42 changes: 31 additions & 11 deletions ibis/expr/types/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1483,11 +1483,12 @@ def __polars_result__(self, df: pl.DataFrame) -> Any:

return PolarsData.convert_scalar(df, self.type())

def as_scalar(self):
"""Inform ibis that the expression should be treated as a scalar.
def as_scalar(self) -> Scalar:
"""Tell ibis to treat this expression as a single scalar value.

If the expression is a literal, it will be returned as is. If it depends
on a table, it will be turned to a scalar subquery.
If the expression is a literal, it will be returned as is.
If it depends on a table, eg an aggregation of a column,
it will be turned to a scalar subquery.

Returns
-------
Expand Down Expand Up @@ -1518,7 +1519,7 @@ def as_scalar(self):
return self

def as_table(self) -> ir.Table:
"""Promote the scalar expression to a table.
"""Promote the expression to a [Table](./expression-tables.qmd#ibis.expr.types.Table).

Returns
-------
Expand Down Expand Up @@ -1694,14 +1695,16 @@ def __polars_result__(self, df: pl.DataFrame) -> pl.Series:
return PolarsData.convert_column(df, self.type())

def as_scalar(self) -> Scalar:
"""Inform ibis that the expression should be treated as a scalar.
"""Inform ibis to treat this Column as a scalar.

Ibis cannot know until execution time whether a column expression
contains only one value or many values.

Creates a scalar subquery from the column expression. Since ibis cannot
be sure that the column expression contains only one value, the column
expression is wrapped in a scalar subquery and treated as a scalar.
This method is a way to explicitly tell ibis to trust you that
this column expression will only contain one value at execution time.
This allows you to use this column expression with other tables.

Note that the execution of the scalar subquery will fail if the column
expression contains more than one value.
Note that execution will fail if the column DOES contain more than one value.

Returns
-------
Expand All @@ -1714,6 +1717,23 @@ def as_scalar(self) -> Scalar:
>>> ibis.options.interactive = True
>>> t = ibis.examples.penguins.fetch()
>>> heavy_gentoo = t.filter(t.species == "Gentoo", t.body_mass_g > 6200)

We know from domain knowledge that there is only one Gentoo penguin
with body mass greater than 6200g, so heavy_gentoo is a table with
exactly one row.

If we try to use `t.island == heavy_gentoo.island` directly in a filter,
we will get an error because we are trying to compare columns from two tables
to each other, which we don't know know to align without a specific join:

>>> t.filter(t.island == heavy_gentoo.island) # quartodoc: +EXPECTED_FAILURE
Traceback (most recent call last):
...
_duckdb.BinderException: Binder Error: Referenced table "t1" not found!
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currenty this errors at execution time. Is it possible for us to make this instead error at Expression construction time?


Instead, we should use `as_scalar()` to tell ibis that we know
`heavy_gentoo.island` contains exactly one value:

>>> from_that_island = t.filter(t.island == heavy_gentoo.island.as_scalar())
>>> from_that_island.species.value_counts().order_by("species")
┏━━━━━━━━━┳━━━━━━━━━━━━━━━┓
Expand Down
29 changes: 24 additions & 5 deletions ibis/expr/types/relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -681,12 +681,16 @@ def bind(self, *args: Any, **kwargs: Any) -> tuple[Value, ...]:
)

def as_scalar(self) -> ir.Scalar:
"""Inform ibis that the table expression should be treated as a scalar.
"""Tell ibis to treat this value as 1 row, 1 column table, referred to as a *scalar*.

Note that the table must have exactly one column and one row for this to
work. If the table has more than one column an error will be raised in
expression construction time. If the table has more than one row an
error will be raised by the backend when the expression is executed.
Ibis cannot know until execution time whether a table expression
contains only one row or many rows.

This method is a way to explicitly tell ibis to trust you that
this table expression will only contain one row at execution time.
This allows you to use this table expression with other tables.

Note that execution will fail if the table DOES contain more than one row.

Returns
-------
Expand All @@ -699,6 +703,21 @@ def as_scalar(self) -> ir.Scalar:
>>> ibis.options.interactive = True
>>> t = ibis.examples.penguins.fetch()
>>> heavy_gentoo = t.filter(t.species == "Gentoo", t.body_mass_g > 6200)

We know from domain knowledge that there is only one Gentoo penguin
with body mass greater than 6200g, so heavy_gentoo is a table with
exactly one row.

Say we want to get all penguins from the same island as that
heavy Gentoo penguin.

If we try to use `t.island == heavy_gentoo.select("island")` directly in a filter,
we won't get the result we expect because that will evaluate to a literal
`False` (since of course an ibis.Column is never equal to an ibis.Table).

Instead, we should use `as_scalar()` to tell ibis that we know
`heavy_gentoo.select("island")` contains exactly one row:

>>> from_that_island = t.filter(t.island == heavy_gentoo.select("island").as_scalar())
>>> from_that_island.species.value_counts().order_by("species")
┏━━━━━━━━━┳━━━━━━━━━━━━━━━┓
Expand Down