Mise à jour de Monitor.py et autres scripts

This commit is contained in:
Debian
2025-07-23 10:46:27 +02:00
parent 7081418ce0
commit 7de3e0fb50
8604 changed files with 2789953 additions and 295 deletions

View File

@@ -0,0 +1,409 @@
from __future__ import annotations
import operator
from typing import TYPE_CHECKING, Any, Literal, cast
import ibis
import ibis.expr.types as ir
from narwhals._ibis.utils import evaluate_exprs, native_to_narwhals_dtype
from narwhals._utils import (
Implementation,
ValidateBackendVersion,
Version,
not_implemented,
parse_columns_to_drop,
)
from narwhals.exceptions import ColumnNotFoundError, InvalidOperationError
from narwhals.typing import CompliantLazyFrame
if TYPE_CHECKING:
from collections.abc import Iterable, Iterator, Mapping, Sequence
from types import ModuleType
import pandas as pd
import pyarrow as pa
from ibis.expr.operations import Binary
from typing_extensions import Self, TypeAlias, TypeIs
from narwhals._compliant.typing import CompliantDataFrameAny
from narwhals._ibis.expr import IbisExpr
from narwhals._ibis.group_by import IbisGroupBy
from narwhals._ibis.namespace import IbisNamespace
from narwhals._ibis.series import IbisInterchangeSeries
from narwhals._utils import _LimitedContext
from narwhals.dataframe import LazyFrame
from narwhals.dtypes import DType
from narwhals.stable.v1 import DataFrame as DataFrameV1
from narwhals.typing import AsofJoinStrategy, JoinStrategy, LazyUniqueKeepStrategy
JoinPredicates: TypeAlias = "Sequence[ir.BooleanColumn] | Sequence[str]"
class IbisLazyFrame(
CompliantLazyFrame[
"IbisExpr", "ir.Table", "LazyFrame[ir.Table] | DataFrameV1[ir.Table]"
],
ValidateBackendVersion,
):
_implementation = Implementation.IBIS
def __init__(
self, df: ir.Table, *, version: Version, validate_backend_version: bool = False
) -> None:
self._native_frame: ir.Table = df
self._version = version
self._cached_schema: dict[str, DType] | None = None
self._cached_columns: list[str] | None = None
if validate_backend_version:
self._validate_backend_version()
@staticmethod
def _is_native(obj: ir.Table | Any) -> TypeIs[ir.Table]:
return isinstance(obj, ir.Table)
@classmethod
def from_native(cls, data: ir.Table, /, *, context: _LimitedContext) -> Self:
return cls(data, version=context._version)
def to_narwhals(self) -> LazyFrame[ir.Table] | DataFrameV1[ir.Table]:
if self._version is Version.V1:
from narwhals.stable.v1 import DataFrame
return DataFrame(self, level="interchange")
return self._version.lazyframe(self, level="lazy")
def __narwhals_dataframe__(self) -> Self: # pragma: no cover
# Keep around for backcompat.
if self._version is not Version.V1:
msg = "__narwhals_dataframe__ is not implemented for IbisLazyFrame"
raise AttributeError(msg)
return self
def __narwhals_lazyframe__(self) -> Self:
return self
def __native_namespace__(self) -> ModuleType:
return ibis
def __narwhals_namespace__(self) -> IbisNamespace:
from narwhals._ibis.namespace import IbisNamespace
return IbisNamespace(version=self._version)
def get_column(self, name: str) -> IbisInterchangeSeries:
from narwhals._ibis.series import IbisInterchangeSeries
return IbisInterchangeSeries(self.native.select(name), version=self._version)
def _iter_columns(self) -> Iterator[ir.Expr]:
for name in self.columns:
yield self.native[name]
def collect(
self, backend: ModuleType | Implementation | str | None, **kwargs: Any
) -> CompliantDataFrameAny:
if backend is None or backend is Implementation.PYARROW:
from narwhals._arrow.dataframe import ArrowDataFrame
return ArrowDataFrame(
self.native.to_pyarrow(),
validate_backend_version=True,
version=self._version,
validate_column_names=True,
)
if backend is Implementation.PANDAS:
from narwhals._pandas_like.dataframe import PandasLikeDataFrame
return PandasLikeDataFrame(
self.native.to_pandas(),
implementation=Implementation.PANDAS,
validate_backend_version=True,
version=self._version,
validate_column_names=True,
)
if backend is Implementation.POLARS:
from narwhals._polars.dataframe import PolarsDataFrame
return PolarsDataFrame(
self.native.to_polars(),
validate_backend_version=True,
version=self._version,
)
msg = f"Unsupported `backend` value: {backend}" # pragma: no cover
raise ValueError(msg) # pragma: no cover
def head(self, n: int) -> Self:
return self._with_native(self.native.head(n))
def simple_select(self, *column_names: str) -> Self:
return self._with_native(self.native.select(*column_names))
def aggregate(self, *exprs: IbisExpr) -> Self:
selection = [
cast("ir.Scalar", val.name(name))
for name, val in evaluate_exprs(self, *exprs)
]
return self._with_native(self.native.aggregate(selection))
def select(self, *exprs: IbisExpr) -> Self:
selection = [val.name(name) for name, val in evaluate_exprs(self, *exprs)]
if not selection:
msg = "At least one expression must be provided to `select` with the Ibis backend."
raise ValueError(msg)
t = self.native.select(*selection)
return self._with_native(t)
def drop(self, columns: Sequence[str], *, strict: bool) -> Self:
columns_to_drop = parse_columns_to_drop(self, columns, strict=strict)
selection = (col for col in self.columns if col not in columns_to_drop)
return self._with_native(self.native.select(*selection))
def lazy(self, *, backend: Implementation | None = None) -> Self:
# The `backend`` argument has no effect but we keep it here for
# backwards compatibility because in `narwhals.stable.v1`
# function `.from_native()` will return a DataFrame for Ibis.
if backend is not None: # pragma: no cover
msg = "`backend` argument is not supported for Ibis"
raise ValueError(msg)
return self
def with_columns(self, *exprs: IbisExpr) -> Self:
new_columns_map = dict(evaluate_exprs(self, *exprs))
return self._with_native(self.native.mutate(**new_columns_map))
def filter(self, predicate: IbisExpr) -> Self:
# `[0]` is safe as the predicate's expression only returns a single column
mask = cast("ir.BooleanValue", predicate(self)[0])
return self._with_native(self.native.filter(mask))
@property
def schema(self) -> dict[str, DType]:
if self._cached_schema is None:
# Note: prefer `self._cached_schema` over `functools.cached_property`
# due to Python3.13 failures.
self._cached_schema = {
name: native_to_narwhals_dtype(dtype, self._version)
for name, dtype in self.native.schema().fields.items()
}
return self._cached_schema
@property
def columns(self) -> list[str]:
if self._cached_columns is None:
self._cached_columns = (
list(self.schema)
if self._cached_schema is not None
else list(self.native.columns)
)
return self._cached_columns
def to_pandas(self) -> pd.DataFrame:
# only if version is v1, keep around for backcompat
return self.native.to_pandas()
def to_arrow(self) -> pa.Table:
# only if version is v1, keep around for backcompat
return self.native.to_pyarrow()
def _with_version(self, version: Version) -> Self:
return self.__class__(self.native, version=version)
def _with_native(self, df: ir.Table) -> Self:
return self.__class__(df, version=self._version)
def group_by(
self, keys: Sequence[str] | Sequence[IbisExpr], *, drop_null_keys: bool
) -> IbisGroupBy:
from narwhals._ibis.group_by import IbisGroupBy
return IbisGroupBy(self, keys, drop_null_keys=drop_null_keys)
def rename(self, mapping: Mapping[str, str]) -> Self:
def _rename(col: str) -> str:
return mapping.get(col, col)
return self._with_native(self.native.rename(_rename))
@staticmethod
def _join_drop_duplicate_columns(df: ir.Table, columns: Iterable[str], /) -> ir.Table:
"""Ibis adds a suffix to the right table col, even when it matches the left during a join."""
duplicates = set(df.columns).intersection(columns)
return df.drop(*duplicates) if duplicates else df
def join(
self,
other: Self,
*,
how: JoinStrategy,
left_on: Sequence[str] | None,
right_on: Sequence[str] | None,
suffix: str,
) -> Self:
how_native = "outer" if how == "full" else how
rname = "{name}" + suffix
if other == self:
# Ibis does not support self-references unless created as a view
other = self._with_native(other.native.view())
if how_native == "cross":
joined = self.native.join(other.native, how=how_native, rname=rname)
return self._with_native(joined)
# help mypy
assert left_on is not None # noqa: S101
assert right_on is not None # noqa: S101
predicates = self._convert_predicates(other, left_on, right_on)
joined = self.native.join(other.native, predicates, how=how_native, rname=rname)
if how_native == "left":
right_names = (n + suffix for n in right_on)
joined = self._join_drop_duplicate_columns(joined, right_names)
it = (cast("Binary", p.op()) for p in predicates if not isinstance(p, str))
to_drop = []
for pred in it:
right = pred.right.name
# Mirrors how polars works.
if right not in self.columns and pred.left.name != right:
to_drop.append(right)
if to_drop:
joined = joined.drop(*to_drop)
return self._with_native(joined)
def join_asof(
self,
other: Self,
*,
left_on: str,
right_on: str,
by_left: Sequence[str] | None,
by_right: Sequence[str] | None,
strategy: AsofJoinStrategy,
suffix: str,
) -> Self:
rname = "{name}" + suffix
strategy_op = {"backward": operator.ge, "forward": operator.le}
predicates: JoinPredicates = []
if op := strategy_op.get(strategy):
on: ir.BooleanColumn = op(self.native[left_on], other.native[right_on])
else:
msg = "Only `backward` and `forward` strategies are currently supported for Ibis"
raise NotImplementedError(msg)
if by_left is not None and by_right is not None:
predicates = self._convert_predicates(other, by_left, by_right)
joined = self.native.asof_join(other.native, on, predicates, rname=rname)
joined = self._join_drop_duplicate_columns(joined, [right_on + suffix])
if by_right is not None:
right_names = (n + suffix for n in by_right)
joined = self._join_drop_duplicate_columns(joined, right_names)
return self._with_native(joined)
def _convert_predicates(
self, other: Self, left_on: Sequence[str], right_on: Sequence[str]
) -> JoinPredicates:
if left_on == right_on:
return left_on
return [
cast("ir.BooleanColumn", (self.native[left] == other.native[right]))
for left, right in zip(left_on, right_on)
]
def collect_schema(self) -> dict[str, DType]:
return {
name: native_to_narwhals_dtype(dtype, self._version)
for name, dtype in self.native.schema().fields.items()
}
def unique(
self, subset: Sequence[str] | None, *, keep: LazyUniqueKeepStrategy
) -> Self:
if subset_ := subset if keep == "any" else (subset or self.columns):
# Sanitise input
if any(x not in self.columns for x in subset_):
msg = f"Columns {set(subset_).difference(self.columns)} not found in {self.columns}."
raise ColumnNotFoundError(msg)
mapped_keep: dict[str, Literal["first"] | None] = {
"any": "first",
"none": None,
}
to_keep = mapped_keep[keep]
return self._with_native(self.native.distinct(on=subset_, keep=to_keep))
return self._with_native(self.native.distinct(on=subset))
def sort(self, *by: str, descending: bool | Sequence[bool], nulls_last: bool) -> Self:
if isinstance(descending, bool):
descending = [descending for _ in range(len(by))]
sort_cols = []
for i in range(len(by)):
direction_fn = ibis.desc if descending[i] else ibis.asc
col = direction_fn(by[i], nulls_first=not nulls_last)
sort_cols.append(cast("ir.Column", col))
return self._with_native(self.native.order_by(*sort_cols))
def drop_nulls(self, subset: Sequence[str] | None) -> Self:
subset_ = subset if subset is not None else self.columns
return self._with_native(self.native.drop_null(subset_))
def explode(self, columns: Sequence[str]) -> Self:
dtypes = self._version.dtypes
schema = self.collect_schema()
for col in columns:
dtype = schema[col]
if dtype != dtypes.List:
msg = (
f"`explode` operation not supported for dtype `{dtype}`, "
"expected List type"
)
raise InvalidOperationError(msg)
if len(columns) != 1:
msg = (
"Exploding on multiple columns is not supported with Ibis backend since "
"we cannot guarantee that the exploded columns have matching element counts."
)
raise NotImplementedError(msg)
return self._with_native(self.native.unnest(columns[0], keep_empty=True))
def unpivot(
self,
on: Sequence[str] | None,
index: Sequence[str] | None,
variable_name: str,
value_name: str,
) -> Self:
import ibis.selectors as s
index_: Sequence[str] = [] if index is None else index
on_: Sequence[str] = (
[c for c in self.columns if c not in index_] if on is None else on
)
# Discard columns not in the index
final_columns = list(dict.fromkeys([*index_, variable_name, value_name]))
unpivoted = self.native.pivot_longer(
s.cols(*on_), names_to=variable_name, values_to=value_name
)
return self._with_native(unpivoted.select(*final_columns))
def with_row_index(self, name: str, order_by: Sequence[str]) -> Self:
to_select = [
ibis.row_number().over(ibis.window(order_by=order_by)).name(name),
ibis.selectors.all(),
]
return self._with_native(self.native.select(*to_select))
gather_every = not_implemented.deprecated(
"`LazyFrame.gather_every` is deprecated and will be removed in a future version."
)
tail = not_implemented.deprecated(
"`LazyFrame.tail` is deprecated and will be removed in a future version."
)

View File

@@ -0,0 +1,648 @@
from __future__ import annotations
import operator
from functools import partial
from typing import TYPE_CHECKING, Any, Callable, Literal, TypeVar, cast
import ibis
from narwhals._compliant import LazyExpr, WindowInputs
from narwhals._expression_parsing import (
combine_alias_output_names,
combine_evaluate_output_names,
)
from narwhals._ibis.expr_dt import IbisExprDateTimeNamespace
from narwhals._ibis.expr_list import IbisExprListNamespace
from narwhals._ibis.expr_str import IbisExprStringNamespace
from narwhals._ibis.expr_struct import IbisExprStructNamespace
from narwhals._ibis.utils import is_floating, lit, narwhals_to_native_dtype
from narwhals._utils import Implementation, not_implemented
if TYPE_CHECKING:
from collections.abc import Iterable, Iterator, Sequence
import ibis.expr.types as ir
from typing_extensions import Self
from narwhals._compliant.typing import (
AliasNames,
EvalNames,
EvalSeries,
WindowFunction,
)
from narwhals._expression_parsing import ExprKind, ExprMetadata
from narwhals._ibis.dataframe import IbisLazyFrame
from narwhals._ibis.namespace import IbisNamespace
from narwhals._utils import Version, _LimitedContext
from narwhals.typing import IntoDType, RankMethod, RollingInterpolationMethod
ExprT = TypeVar("ExprT", bound=ir.Value)
IbisWindowFunction = WindowFunction[IbisLazyFrame, ir.Value]
IbisWindowInputs = WindowInputs[ir.Value]
class IbisExpr(LazyExpr["IbisLazyFrame", "ir.Column"]):
_implementation = Implementation.IBIS
def __init__(
self,
call: EvalSeries[IbisLazyFrame, ir.Value],
window_function: IbisWindowFunction | None = None,
*,
evaluate_output_names: EvalNames[IbisLazyFrame],
alias_output_names: AliasNames | None,
version: Version,
) -> None:
self._call = call
self._evaluate_output_names = evaluate_output_names
self._alias_output_names = alias_output_names
self._version = version
self._metadata: ExprMetadata | None = None
self._window_function: IbisWindowFunction | None = window_function
@property
def window_function(self) -> IbisWindowFunction:
def default_window_func(
df: IbisLazyFrame, window_inputs: IbisWindowInputs
) -> list[ir.Value]:
return [
expr.over(
ibis.window(
group_by=window_inputs.partition_by,
order_by=self._sort(*window_inputs.order_by),
)
)
for expr in self(df)
]
return self._window_function or default_window_func
def __call__(self, df: IbisLazyFrame) -> Sequence[ir.Value]:
return self._call(df)
def __narwhals_expr__(self) -> None: ...
def __narwhals_namespace__(self) -> IbisNamespace: # pragma: no cover
from narwhals._ibis.namespace import IbisNamespace
return IbisNamespace(version=self._version)
def _cum_window_func(
self, *, reverse: bool, func_name: Literal["sum", "max", "min", "count"]
) -> IbisWindowFunction:
def func(df: IbisLazyFrame, inputs: IbisWindowInputs) -> Sequence[ir.Value]:
window = ibis.window(
group_by=list(inputs.partition_by),
order_by=self._sort(
*inputs.order_by, descending=reverse, nulls_last=reverse
),
preceding=None, # unbounded
following=0,
)
return [getattr(expr, func_name)().over(window) for expr in self(df)]
return func
def _rolling_window_func(
self,
*,
func_name: Literal["sum", "mean", "std", "var"],
center: bool,
window_size: int,
min_samples: int,
ddof: int | None = None,
) -> IbisWindowFunction:
supported_funcs = ["sum", "mean", "std", "var"]
if center:
preceding = window_size // 2
following = window_size - preceding - 1
else:
preceding = window_size - 1
following = 0
def func(df: IbisLazyFrame, inputs: IbisWindowInputs) -> Sequence[ir.Value]:
window = ibis.window(
group_by=list(inputs.partition_by),
order_by=self._sort(*inputs.order_by),
preceding=preceding,
following=following,
)
def inner_f(expr: ir.NumericColumn) -> ir.Value:
if func_name in {"sum", "mean"}:
func_ = getattr(expr, func_name)()
elif func_name == "var" and ddof == 0:
func_ = expr.var(how="pop")
elif func_name in "var" and ddof == 1:
func_ = expr.var(how="sample")
elif func_name == "std" and ddof == 0:
func_ = expr.std(how="pop")
elif func_name == "std" and ddof == 1:
func_ = expr.std(how="sample")
elif func_name in {"var", "std"}: # pragma: no cover
msg = f"Only ddof=0 and ddof=1 are currently supported for rolling_{func_name}."
raise ValueError(msg)
else: # pragma: no cover
msg = f"Only the following functions are supported: {supported_funcs}.\nGot: {func_name}."
raise ValueError(msg)
rolling_calc = func_.over(window)
valid_count = expr.count().over(window)
return ibis.cases(
(valid_count >= ibis.literal(min_samples), rolling_calc),
else_=ibis.null(),
)
return [inner_f(cast("ir.NumericColumn", expr)) for expr in self(df)]
return func
def broadcast(self, kind: Literal[ExprKind.AGGREGATION, ExprKind.LITERAL]) -> Self:
# Ibis does its own broadcasting.
return self
def _sort(
self, *cols: ir.Column | str, descending: bool = False, nulls_last: bool = False
) -> Iterator[ir.Column]:
mapping = {
(False, False): partial(ibis.asc, nulls_first=True),
(False, True): partial(ibis.asc, nulls_first=False),
(True, False): partial(ibis.desc, nulls_first=True),
(True, True): partial(ibis.desc, nulls_first=False),
}
sort = mapping[(descending, nulls_last)]
yield from (cast("ir.Column", sort(col)) for col in cols)
@classmethod
def from_column_names(
cls: type[Self],
evaluate_column_names: EvalNames[IbisLazyFrame],
/,
*,
context: _LimitedContext,
) -> Self:
def func(df: IbisLazyFrame) -> list[ir.Column]:
return [df.native[name] for name in evaluate_column_names(df)]
return cls(
func,
evaluate_output_names=evaluate_column_names,
alias_output_names=None,
version=context._version,
)
@classmethod
def from_column_indices(cls, *column_indices: int, context: _LimitedContext) -> Self:
def func(df: IbisLazyFrame) -> list[ir.Column]:
return [df.native[i] for i in column_indices]
return cls(
func,
evaluate_output_names=cls._eval_names_indices(column_indices),
alias_output_names=None,
version=context._version,
)
@classmethod
def _from_elementwise_horizontal_op(
cls, func: Callable[[Iterable[ir.Value]], ir.Value], *exprs: Self
) -> Self:
def call(df: IbisLazyFrame) -> list[ir.Value]:
cols = (col for _expr in exprs for col in _expr(df))
return [func(cols)]
context = exprs[0]
return cls(
call=call,
evaluate_output_names=combine_evaluate_output_names(*exprs),
alias_output_names=combine_alias_output_names(*exprs),
version=context._version,
)
def _with_callable(
self, call: Callable[..., ir.Value], /, **expressifiable_args: Self | Any
) -> Self:
"""Create expression from callable.
Arguments:
call: Callable from compliant DataFrame to native Expression
expr_name: Expression name
expressifiable_args: arguments pass to expression which should be parsed
as expressions (e.g. in `nw.col('a').is_between('b', 'c')`)
"""
def func(df: IbisLazyFrame) -> list[ir.Value]:
native_series_list = self(df)
other_native_series = {
key: df._evaluate_expr(value) if self._is_expr(value) else lit(value)
for key, value in expressifiable_args.items()
}
return [
call(native_series, **other_native_series)
for native_series in native_series_list
]
return self.__class__(
func,
evaluate_output_names=self._evaluate_output_names,
alias_output_names=self._alias_output_names,
version=self._version,
)
def _with_binary(self, op: Callable[..., ir.Value], other: Self | Any) -> Self:
return self._with_callable(op, other=other)
def _with_alias_output_names(self, func: AliasNames | None, /) -> Self:
return type(self)(
self._call,
self._window_function,
evaluate_output_names=self._evaluate_output_names,
alias_output_names=func,
version=self._version,
)
def _with_window_function(self, window_function: IbisWindowFunction) -> Self:
return self.__class__(
self._call,
window_function,
evaluate_output_names=self._evaluate_output_names,
alias_output_names=self._alias_output_names,
version=self._version,
)
@classmethod
def _alias_native(cls, expr: ExprT, name: str, /) -> ExprT:
return cast("ExprT", expr.name(name))
def __invert__(self) -> Self:
invert = cast("Callable[..., ir.Value]", operator.invert)
return self._with_callable(invert)
def abs(self) -> Self:
return self._with_callable(lambda expr: expr.abs())
def mean(self) -> Self:
return self._with_callable(lambda expr: expr.mean())
def median(self) -> Self:
return self._with_callable(lambda expr: expr.median())
def all(self) -> Self:
return self._with_callable(lambda expr: expr.all().fill_null(lit(True))) # noqa: FBT003
def any(self) -> Self:
return self._with_callable(lambda expr: expr.any().fill_null(lit(False))) # noqa: FBT003
def quantile(
self, quantile: float, interpolation: RollingInterpolationMethod
) -> Self:
if interpolation != "linear":
msg = "Only linear interpolation methods are supported for Ibis quantile."
raise NotImplementedError(msg)
return self._with_callable(lambda expr: expr.quantile(quantile))
def clip(self, lower_bound: Any, upper_bound: Any) -> Self:
def _clip(
expr: ir.NumericValue, lower: Any | None = None, upper: Any | None = None
) -> ir.NumericValue:
return expr.clip(lower=lower, upper=upper)
if lower_bound is None:
return self._with_callable(_clip, upper=upper_bound)
if upper_bound is None:
return self._with_callable(_clip, lower=lower_bound)
return self._with_callable(_clip, lower=lower_bound, upper=upper_bound)
def sum(self) -> Self:
return self._with_callable(lambda expr: expr.sum().fill_null(lit(0)))
def n_unique(self) -> Self:
return self._with_callable(
lambda expr: expr.nunique() + expr.isnull().any().cast("int8")
)
def count(self) -> Self:
return self._with_callable(lambda expr: expr.count())
def len(self) -> Self:
def func(df: IbisLazyFrame) -> list[ir.IntegerScalar]:
return [df.native.count()]
return self.__class__(
func,
evaluate_output_names=self._evaluate_output_names,
alias_output_names=self._alias_output_names,
version=self._version,
)
def std(self, ddof: int) -> Self:
def _std(expr: ir.NumericColumn, ddof: int) -> ir.Value:
if ddof == 0:
return expr.std(how="pop")
elif ddof == 1:
return expr.std(how="sample")
else:
n_samples = expr.count()
std_pop = expr.std(how="pop")
ddof_lit = cast("ir.IntegerScalar", ibis.literal(ddof))
return std_pop * n_samples.sqrt() / (n_samples - ddof_lit).sqrt()
return self._with_callable(lambda expr: _std(expr, ddof))
def var(self, ddof: int) -> Self:
def _var(expr: ir.NumericColumn, ddof: int) -> ir.Value:
if ddof == 0:
return expr.var(how="pop")
elif ddof == 1:
return expr.var(how="sample")
else:
n_samples = expr.count()
var_pop = expr.var(how="pop")
ddof_lit = cast("ir.IntegerScalar", ibis.literal(ddof))
return var_pop * n_samples / (n_samples - ddof_lit)
return self._with_callable(lambda expr: _var(expr, ddof))
def max(self) -> Self:
return self._with_callable(lambda expr: expr.max())
def min(self) -> Self:
return self._with_callable(lambda expr: expr.min())
def null_count(self) -> Self:
return self._with_callable(lambda expr: expr.isnull().sum())
def over(self, partition_by: Sequence[str], order_by: Sequence[str]) -> Self:
def func(df: IbisLazyFrame) -> Sequence[ir.Value]:
return self.window_function(df, WindowInputs(partition_by, order_by))
return self.__class__(
func,
evaluate_output_names=self._evaluate_output_names,
alias_output_names=self._alias_output_names,
version=self._version,
)
def is_null(self) -> Self:
return self._with_callable(lambda expr: expr.isnull())
def is_nan(self) -> Self:
def func(expr: ir.FloatingValue | Any) -> ir.Value:
otherwise = expr.isnan() if is_floating(expr.type()) else False
return ibis.ifelse(expr.isnull(), None, otherwise)
return self._with_callable(func)
def is_finite(self) -> Self:
return self._with_callable(lambda expr: ~(expr.isinf() | expr.isnan()))
def is_in(self, other: Sequence[Any]) -> Self:
return self._with_callable(lambda expr: expr.isin(other))
def round(self, decimals: int) -> Self:
return self._with_callable(lambda expr: expr.round(decimals))
def shift(self, n: int) -> Self:
def _func(df: IbisLazyFrame, inputs: IbisWindowInputs) -> Sequence[ir.Value]:
return [
expr.lag(n).over( # type: ignore[attr-defined, unused-ignore]
ibis.window(
group_by=inputs.partition_by,
order_by=self._sort(*inputs.order_by),
)
)
for expr in self(df)
]
return self._with_window_function(_func)
def is_first_distinct(self) -> Self:
def func(
df: IbisLazyFrame, inputs: IbisWindowInputs
) -> Sequence[ir.BooleanValue]:
# ibis row_number starts at 0, so need to compare with 0 instead of the usual `1`
return [
ibis.row_number().over(
ibis.window(
group_by=[*inputs.partition_by, expr],
order_by=self._sort(*inputs.order_by),
)
)
== lit(0)
for expr in self(df)
]
return self._with_window_function(func)
def is_last_distinct(self) -> Self:
def func(
df: IbisLazyFrame, inputs: IbisWindowInputs
) -> Sequence[ir.BooleanValue]:
# ibis row_number starts at 0, so need to compare with 0 instead of the usual `1`
return [
ibis.row_number().over(
ibis.window(
group_by=[*inputs.partition_by, expr],
order_by=self._sort(
*inputs.order_by, descending=True, nulls_last=True
),
)
)
== lit(0)
for expr in self(df)
]
return self._with_window_function(func)
def diff(self) -> Self:
def _func(df: IbisLazyFrame, inputs: IbisWindowInputs) -> Sequence[ir.Value]:
return [
expr
- expr.lag().over( # type: ignore[attr-defined, unused-ignore]
ibis.window(
following=0,
group_by=inputs.partition_by,
order_by=self._sort(*inputs.order_by),
)
)
for expr in self(df)
]
return self._with_window_function(_func)
def cum_sum(self, *, reverse: bool) -> Self:
return self._with_window_function(
self._cum_window_func(reverse=reverse, func_name="sum")
)
def cum_max(self, *, reverse: bool) -> Self:
return self._with_window_function(
self._cum_window_func(reverse=reverse, func_name="max")
)
def cum_min(self, *, reverse: bool) -> Self:
return self._with_window_function(
self._cum_window_func(reverse=reverse, func_name="min")
)
def cum_count(self, *, reverse: bool) -> Self:
return self._with_window_function(
self._cum_window_func(reverse=reverse, func_name="count")
)
def rolling_sum(self, window_size: int, *, min_samples: int, center: bool) -> Self:
return self._with_window_function(
self._rolling_window_func(
func_name="sum",
center=center,
window_size=window_size,
min_samples=min_samples,
)
)
def rolling_mean(self, window_size: int, *, min_samples: int, center: bool) -> Self:
return self._with_window_function(
self._rolling_window_func(
func_name="mean",
center=center,
window_size=window_size,
min_samples=min_samples,
)
)
def rolling_var(
self, window_size: int, *, min_samples: int, center: bool, ddof: int
) -> Self:
return self._with_window_function(
self._rolling_window_func(
func_name="var",
center=center,
window_size=window_size,
min_samples=min_samples,
ddof=ddof,
)
)
def rolling_std(
self, window_size: int, *, min_samples: int, center: bool, ddof: int
) -> Self:
return self._with_window_function(
self._rolling_window_func(
func_name="std",
center=center,
window_size=window_size,
min_samples=min_samples,
ddof=ddof,
)
)
def fill_null(self, value: Self | Any, strategy: Any, limit: int | None) -> Self:
# Ibis doesn't yet allow ignoring nulls in first/last with window functions, which makes forward/backward
# strategies inconsistent when there are nulls present: https://github.com/ibis-project/ibis/issues/9539
if strategy is not None:
msg = "`strategy` is not supported for the Ibis backend"
raise NotImplementedError(msg)
if limit is not None:
msg = "`limit` is not supported for the Ibis backend" # pragma: no cover
raise NotImplementedError(msg)
def _fill_null(expr: ir.Value, value: ir.Scalar) -> ir.Value:
return expr.fill_null(value)
return self._with_callable(_fill_null, value=value)
def cast(self, dtype: IntoDType) -> Self:
def _func(expr: ir.Column) -> ir.Value:
native_dtype = narwhals_to_native_dtype(dtype, self._version)
# ibis `cast` overloads do not include DataType, only literals
return expr.cast(native_dtype) # type: ignore[unused-ignore]
return self._with_callable(_func)
def is_unique(self) -> Self:
return self._with_callable(
lambda expr: expr.isnull().count().over(ibis.window(group_by=(expr))) == 1
)
def rank(self, method: RankMethod, *, descending: bool) -> Self:
def _rank(expr: ir.Column) -> ir.Column:
order_by = next(self._sort(expr, descending=descending, nulls_last=True))
window = ibis.window(order_by=order_by)
if method == "dense":
rank_ = order_by.dense_rank()
elif method == "ordinal":
rank_ = cast("ir.IntegerColumn", ibis.row_number().over(window))
else:
rank_ = order_by.rank()
# Ibis uses 0-based ranking. Add 1 to match polars 1-based rank.
rank_ = rank_ + cast("ir.IntegerValue", lit(1))
# For "max" and "average", adjust using the count of rows in the partition.
if method == "max":
# Define a window partitioned by expr (i.e. each distinct value)
partition = ibis.window(group_by=[expr])
cnt = cast("ir.IntegerValue", expr.count().over(partition))
rank_ = rank_ + cnt - cast("ir.IntegerValue", lit(1))
elif method == "average":
partition = ibis.window(group_by=[expr])
cnt = cast("ir.IntegerValue", expr.count().over(partition))
avg = cast(
"ir.NumericValue", (cnt - cast("ir.IntegerScalar", lit(1))) / lit(2.0)
)
rank_ = rank_ + avg
return cast("ir.Column", ibis.cases((expr.notnull(), rank_)))
return self._with_callable(_rank)
def log(self, base: float) -> Self:
def _log(expr: ir.NumericColumn) -> ir.Value:
otherwise = expr.log(cast("ir.NumericValue", lit(base)))
return ibis.cases(
(expr < lit(0), lit(float("nan"))),
(expr == lit(0), lit(float("-inf"))),
else_=otherwise,
)
return self._with_callable(_log)
def exp(self) -> Self:
def _exp(expr: ir.NumericColumn) -> ir.Value:
return expr.exp()
return self._with_callable(_exp)
def sqrt(self) -> Self:
def _sqrt(expr: ir.NumericColumn) -> ir.Value:
return ibis.cases((expr < lit(0), lit(float("nan"))), else_=expr.sqrt())
return self._with_callable(_sqrt)
@property
def str(self) -> IbisExprStringNamespace:
return IbisExprStringNamespace(self)
@property
def dt(self) -> IbisExprDateTimeNamespace:
return IbisExprDateTimeNamespace(self)
@property
def list(self) -> IbisExprListNamespace:
return IbisExprListNamespace(self)
@property
def struct(self) -> IbisExprStructNamespace:
return IbisExprStructNamespace(self)
# NOTE: https://github.com/ibis-project/ibis/issues/10542
cum_prod = not_implemented()
drop_nulls = not_implemented()
# NOTE: https://github.com/ibis-project/ibis/issues/11176
skew = not_implemented()
kurtosis = not_implemented()
unique = not_implemented()

View File

@@ -0,0 +1,111 @@
from __future__ import annotations
from typing import TYPE_CHECKING, Any, Callable
from narwhals._compliant import LazyExprNamespace
from narwhals._compliant.any_namespace import DateTimeNamespace
from narwhals._duration import Interval
from narwhals._ibis.utils import (
UNITS_DICT_BUCKET,
UNITS_DICT_TRUNCATE,
timedelta_to_ibis_interval,
)
from narwhals._utils import not_implemented
if TYPE_CHECKING:
import ibis.expr.types as ir
from narwhals._ibis.expr import IbisExpr
from narwhals._ibis.utils import BucketUnit, TruncateUnit
class IbisExprDateTimeNamespace(
LazyExprNamespace["IbisExpr"], DateTimeNamespace["IbisExpr"]
):
def year(self) -> IbisExpr:
return self.compliant._with_callable(lambda expr: expr.year())
def month(self) -> IbisExpr:
return self.compliant._with_callable(lambda expr: expr.month())
def day(self) -> IbisExpr:
return self.compliant._with_callable(lambda expr: expr.day())
def hour(self) -> IbisExpr:
return self.compliant._with_callable(lambda expr: expr.hour())
def minute(self) -> IbisExpr:
return self.compliant._with_callable(lambda expr: expr.minute())
def second(self) -> IbisExpr:
return self.compliant._with_callable(lambda expr: expr.second())
def millisecond(self) -> IbisExpr:
return self.compliant._with_callable(lambda expr: expr.millisecond())
def microsecond(self) -> IbisExpr:
return self.compliant._with_callable(lambda expr: expr.microsecond())
def to_string(self, format: str) -> IbisExpr:
return self.compliant._with_callable(lambda expr: expr.strftime(format))
def weekday(self) -> IbisExpr:
# Ibis uses 0-6 for Monday-Sunday. Add 1 to match polars.
return self.compliant._with_callable(lambda expr: expr.day_of_week.index() + 1)
def ordinal_day(self) -> IbisExpr:
return self.compliant._with_callable(lambda expr: expr.day_of_year())
def date(self) -> IbisExpr:
return self.compliant._with_callable(lambda expr: expr.date())
def _bucket(self, kwds: dict[BucketUnit, Any], /) -> Callable[..., ir.TimestampValue]:
def fn(expr: ir.TimestampValue) -> ir.TimestampValue:
return expr.bucket(**kwds)
return fn
def _truncate(self, unit: TruncateUnit, /) -> Callable[..., ir.TimestampValue]:
def fn(expr: ir.TimestampValue) -> ir.TimestampValue:
return expr.truncate(unit)
return fn
def truncate(self, every: str) -> IbisExpr:
interval = Interval.parse(every)
multiple, unit = interval.multiple, interval.unit
if unit == "q":
multiple, unit = 3 * multiple, "mo"
if multiple != 1:
if self.compliant._backend_version < (7, 1): # pragma: no cover
msg = "Truncating datetimes with multiples of the unit is only supported in Ibis >= 7.1."
raise NotImplementedError(msg)
fn = self._bucket({UNITS_DICT_BUCKET[unit]: multiple})
else:
fn = self._truncate(UNITS_DICT_TRUNCATE[unit])
return self.compliant._with_callable(fn)
def offset_by(self, every: str) -> IbisExpr:
interval = Interval.parse_no_constraints(every)
unit = interval.unit
if unit in {"y", "q", "mo", "d", "ns"}:
msg = f"Offsetting by {unit} is not yet supported for ibis."
raise NotImplementedError(msg)
offset = timedelta_to_ibis_interval(interval.to_timedelta())
return self.compliant._with_callable(lambda expr: expr.add(offset))
def replace_time_zone(self, time_zone: str | None) -> IbisExpr:
if time_zone is None:
return self.compliant._with_callable(lambda expr: expr.cast("timestamp"))
else: # pragma: no cover
msg = "`replace_time_zone` with non-null `time_zone` not yet implemented for Ibis"
raise NotImplementedError(msg)
nanosecond = not_implemented()
total_minutes = not_implemented()
total_seconds = not_implemented()
total_milliseconds = not_implemented()
total_microseconds = not_implemented()
total_nanoseconds = not_implemented()
convert_time_zone = not_implemented()
timestamp = not_implemented()

View File

@@ -0,0 +1,14 @@
from __future__ import annotations
from typing import TYPE_CHECKING
from narwhals._compliant import LazyExprNamespace
from narwhals._compliant.any_namespace import ListNamespace
if TYPE_CHECKING:
from narwhals._ibis.expr import IbisExpr
class IbisExprListNamespace(LazyExprNamespace["IbisExpr"], ListNamespace["IbisExpr"]):
def len(self) -> IbisExpr:
return self.compliant._with_callable(lambda expr: expr.length())

View File

@@ -0,0 +1,132 @@
from __future__ import annotations
from typing import TYPE_CHECKING, Any, Callable, cast
import ibis
import ibis.expr.types as ir
from ibis.expr.datatypes import Timestamp
from narwhals._compliant import LazyExprNamespace
from narwhals._compliant.any_namespace import StringNamespace
from narwhals._ibis.utils import lit
from narwhals._utils import _is_naive_format, not_implemented
if TYPE_CHECKING:
from narwhals._ibis.expr import IbisExpr
class IbisExprStringNamespace(LazyExprNamespace["IbisExpr"], StringNamespace["IbisExpr"]):
def starts_with(self, prefix: str) -> IbisExpr:
def fn(expr: ir.StringColumn) -> ir.BooleanValue:
return expr.startswith(prefix)
return self.compliant._with_callable(fn)
def ends_with(self, suffix: str) -> IbisExpr:
def fn(expr: ir.StringColumn) -> ir.BooleanValue:
return expr.endswith(suffix)
return self.compliant._with_callable(fn)
def contains(self, pattern: str, *, literal: bool) -> IbisExpr:
def fn(expr: ir.StringColumn) -> ir.BooleanValue:
return expr.contains(pattern) if literal else expr.re_search(pattern)
return self.compliant._with_callable(fn)
def slice(self, offset: int, length: int | None) -> IbisExpr:
def fn(expr: ir.StringColumn) -> ir.StringValue:
return expr.substr(start=offset, length=length)
return self.compliant._with_callable(fn)
def split(self, by: str) -> IbisExpr:
def fn(expr: ir.StringColumn) -> ir.ArrayValue:
return expr.split(by)
return self.compliant._with_callable(fn)
def len_chars(self) -> IbisExpr:
return self.compliant._with_callable(lambda expr: expr.length())
def to_lowercase(self) -> IbisExpr:
return self.compliant._with_callable(lambda expr: expr.lower())
def to_uppercase(self) -> IbisExpr:
return self.compliant._with_callable(lambda expr: expr.upper())
def strip_chars(self, characters: str | None) -> IbisExpr:
if characters is not None:
msg = "Ibis does not support `characters` argument in `str.strip_chars`"
raise NotImplementedError(msg)
return self.compliant._with_callable(lambda expr: expr.strip())
def _replace_all(self, pattern: str, value: str) -> Callable[..., ir.StringValue]:
def fn(expr: ir.StringColumn) -> ir.StringValue:
return expr.re_replace(pattern, value)
return fn
def _replace_all_literal(
self, pattern: str, value: str
) -> Callable[..., ir.StringValue]:
def fn(expr: ir.StringColumn) -> ir.StringValue:
return expr.replace(pattern, value) # pyright: ignore[reportArgumentType]
return fn
def replace_all(self, pattern: str, value: str, *, literal: bool) -> IbisExpr:
fn = self._replace_all_literal if literal else self._replace_all
return self.compliant._with_callable(fn(pattern, value))
def _to_datetime(self, format: str) -> Callable[..., ir.TimestampValue]:
def fn(expr: ir.StringColumn) -> ir.TimestampValue:
return expr.as_timestamp(format)
return fn
def _to_datetime_naive(self, format: str) -> Callable[..., ir.TimestampValue]:
def fn(expr: ir.StringColumn) -> ir.TimestampValue:
dtype: Any = Timestamp(timezone=None)
return expr.as_timestamp(format).cast(dtype)
return fn
def to_datetime(self, format: str | None) -> IbisExpr:
if format is None:
msg = "Cannot infer format with Ibis backend"
raise NotImplementedError(msg)
fn = self._to_datetime_naive if _is_naive_format(format) else self._to_datetime
return self.compliant._with_callable(fn(format))
def to_date(self, format: str | None) -> IbisExpr:
if format is None:
msg = "Cannot infer format with Ibis backend"
raise NotImplementedError(msg)
def fn(expr: ir.StringColumn) -> ir.DateValue:
return expr.as_date(format)
return self.compliant._with_callable(fn)
def zfill(self, width: int) -> IbisExpr:
def func(expr: ir.StringColumn) -> ir.Value:
length = expr.length()
less_than_width = length < lit(width)
zero, hyphen, plus = "0", "-", "+"
starts_with_minus = expr.startswith(hyphen)
starts_with_plus = expr.startswith(plus)
one = cast("ir.IntegerScalar", lit(1))
sub_length = cast("ir.IntegerValue", length - one)
substring = expr.substr(one, sub_length).lpad(width - 1, zero)
return ibis.cases(
(starts_with_minus & less_than_width, (substring.lpad(width, hyphen))),
(starts_with_plus & less_than_width, (substring.lpad(width, plus))),
(less_than_width, expr.lpad(width, zero)),
else_=expr,
)
return self.compliant._with_callable(func)
replace = not_implemented()

View File

@@ -0,0 +1,19 @@
from __future__ import annotations
from typing import TYPE_CHECKING
from narwhals._compliant import LazyExprNamespace
from narwhals._compliant.any_namespace import StructNamespace
if TYPE_CHECKING:
import ibis.expr.types as ir
from narwhals._ibis.expr import IbisExpr
class IbisExprStructNamespace(LazyExprNamespace["IbisExpr"], StructNamespace["IbisExpr"]):
def field(self, name: str) -> IbisExpr:
def func(expr: ir.StructColumn) -> ir.Column:
return expr[name]
return self.compliant._with_callable(func).alias(name)

View File

@@ -0,0 +1,32 @@
from __future__ import annotations
from typing import TYPE_CHECKING
from narwhals._compliant import LazyGroupBy
if TYPE_CHECKING:
from collections.abc import Sequence
import ibis.expr.types as ir # noqa: F401
from narwhals._ibis.dataframe import IbisLazyFrame
from narwhals._ibis.expr import IbisExpr
class IbisGroupBy(LazyGroupBy["IbisLazyFrame", "IbisExpr", "ir.Value"]):
def __init__(
self,
df: IbisLazyFrame,
keys: Sequence[str] | Sequence[IbisExpr],
/,
*,
drop_null_keys: bool,
) -> None:
frame, self._keys, self._output_key_names = self._parse_keys(df, keys=keys)
self._compliant_frame = frame.drop_nulls(self._keys) if drop_null_keys else frame
def agg(self, *exprs: IbisExpr) -> IbisLazyFrame:
native = self.compliant.native
return self.compliant._with_native(
native.group_by(self._keys).aggregate(*self._evaluate_exprs(exprs))
).rename(dict(zip(self._keys, self._output_key_names)))

View File

@@ -0,0 +1,191 @@
from __future__ import annotations
import operator
from functools import reduce
from itertools import chain
from typing import TYPE_CHECKING, Any, cast
import ibis
import ibis.expr.types as ir
from narwhals._compliant import LazyNamespace, LazyThen, LazyWhen
from narwhals._expression_parsing import (
combine_alias_output_names,
combine_evaluate_output_names,
)
from narwhals._ibis.dataframe import IbisLazyFrame
from narwhals._ibis.expr import IbisExpr
from narwhals._ibis.selectors import IbisSelectorNamespace
from narwhals._ibis.utils import lit, narwhals_to_native_dtype
from narwhals._utils import Implementation, requires
if TYPE_CHECKING:
from collections.abc import Iterable, Sequence
from narwhals._utils import Version
from narwhals.typing import ConcatMethod, IntoDType
class IbisNamespace(LazyNamespace[IbisLazyFrame, IbisExpr, "ir.Table"]):
_implementation: Implementation = Implementation.IBIS
def __init__(self, *, version: Version) -> None:
self._version = version
@property
def selectors(self) -> IbisSelectorNamespace:
return IbisSelectorNamespace.from_namespace(self)
@property
def _expr(self) -> type[IbisExpr]:
return IbisExpr
@property
def _lazyframe(self) -> type[IbisLazyFrame]:
return IbisLazyFrame
def concat(
self, items: Iterable[IbisLazyFrame], *, how: ConcatMethod
) -> IbisLazyFrame:
if how == "diagonal":
msg = "diagonal concat not supported for Ibis. Please join instead."
raise NotImplementedError(msg)
items = list(items)
native_items = [item.native for item in items]
schema = items[0].schema
if not all(x.schema == schema for x in items[1:]):
msg = "inputs should all have the same schema"
raise TypeError(msg)
return self._lazyframe.from_native(ibis.union(*native_items), context=self)
def concat_str(
self, *exprs: IbisExpr, separator: str, ignore_nulls: bool
) -> IbisExpr:
def func(df: IbisLazyFrame) -> list[ir.Value]:
cols = list(chain.from_iterable(expr(df) for expr in exprs))
cols_casted = [s.cast("string") for s in cols]
if not ignore_nulls:
result = cols_casted[0]
for col in cols_casted[1:]:
result = result + separator + col
else:
sep = cast("ir.StringValue", lit(separator))
result = sep.join(cols_casted)
return [result]
return self._expr(
call=func,
evaluate_output_names=combine_evaluate_output_names(*exprs),
alias_output_names=combine_alias_output_names(*exprs),
version=self._version,
)
def all_horizontal(self, *exprs: IbisExpr, ignore_nulls: bool) -> IbisExpr:
def func(cols: Iterable[ir.Value]) -> ir.Value:
it = (
(col.fill_null(lit(True)) for col in cols) # noqa: FBT003
if ignore_nulls
else cols
)
return reduce(operator.and_, it)
return self._expr._from_elementwise_horizontal_op(func, *exprs)
def any_horizontal(self, *exprs: IbisExpr, ignore_nulls: bool) -> IbisExpr:
def func(cols: Iterable[ir.Value]) -> ir.Value:
it = (
(col.fill_null(lit(False)) for col in cols) # noqa: FBT003
if ignore_nulls
else cols
)
return reduce(operator.or_, it)
return self._expr._from_elementwise_horizontal_op(func, *exprs)
def max_horizontal(self, *exprs: IbisExpr) -> IbisExpr:
def func(cols: Iterable[ir.Value]) -> ir.Value:
return ibis.greatest(*cols)
return self._expr._from_elementwise_horizontal_op(func, *exprs)
def min_horizontal(self, *exprs: IbisExpr) -> IbisExpr:
def func(cols: Iterable[ir.Value]) -> ir.Value:
return ibis.least(*cols)
return self._expr._from_elementwise_horizontal_op(func, *exprs)
def sum_horizontal(self, *exprs: IbisExpr) -> IbisExpr:
def func(cols: Iterable[ir.Value]) -> ir.Value:
cols = (col.fill_null(lit(0)) for col in cols)
return reduce(operator.add, cols)
return self._expr._from_elementwise_horizontal_op(func, *exprs)
def mean_horizontal(self, *exprs: IbisExpr) -> IbisExpr:
def func(cols: Iterable[ir.Value]) -> ir.Value:
cols = list(cols)
return reduce(operator.add, (col.fill_null(lit(0)) for col in cols)) / reduce(
operator.add, (col.isnull().ifelse(lit(0), lit(1)) for col in cols)
)
return self._expr._from_elementwise_horizontal_op(func, *exprs)
@requires.backend_version((10, 0))
def when(self, predicate: IbisExpr) -> IbisWhen:
return IbisWhen.from_expr(predicate, context=self)
def lit(self, value: Any, dtype: IntoDType | None) -> IbisExpr:
def func(_df: IbisLazyFrame) -> list[ir.Value]:
ibis_dtype = narwhals_to_native_dtype(dtype, self._version) if dtype else None
return [lit(value, ibis_dtype)]
return self._expr(
func,
evaluate_output_names=lambda _df: ["literal"],
alias_output_names=None,
version=self._version,
)
def len(self) -> IbisExpr:
def func(_df: IbisLazyFrame) -> list[ir.Value]:
return [_df.native.count()]
return self._expr(
call=func,
evaluate_output_names=lambda _df: ["len"],
alias_output_names=None,
version=self._version,
)
def coalesce(self, *exprs: IbisExpr) -> IbisExpr:
def func(cols: Iterable[ir.Value]) -> ir.Value:
return ibis.coalesce(*cols)
return self._expr._from_elementwise_horizontal_op(func, *exprs)
class IbisWhen(LazyWhen["IbisLazyFrame", "ir.Value", IbisExpr]):
lit = lit
@property
def _then(self) -> type[IbisThen]:
return IbisThen
def __call__(self, df: IbisLazyFrame) -> Sequence[ir.Value]:
is_expr = self._condition._is_expr
condition = df._evaluate_expr(self._condition)
then_ = self._then_value
then = df._evaluate_expr(then_) if is_expr(then_) else lit(then_)
other_ = self._otherwise_value
if other_ is None:
result = ibis.cases((condition, then))
else:
otherwise = df._evaluate_expr(other_) if is_expr(other_) else lit(other_)
result = ibis.cases((condition, then), else_=otherwise)
return [result]
class IbisThen(LazyThen["IbisLazyFrame", "ir.Value", IbisExpr], IbisExpr): ...

View File

@@ -0,0 +1,29 @@
from __future__ import annotations
from typing import TYPE_CHECKING
from narwhals._compliant import CompliantSelector, LazySelectorNamespace
from narwhals._ibis.expr import IbisExpr
if TYPE_CHECKING:
import ibis.expr.types as ir # noqa: F401
from narwhals._ibis.dataframe import IbisLazyFrame # noqa: F401
class IbisSelectorNamespace(LazySelectorNamespace["IbisLazyFrame", "ir.Value"]):
@property
def _selector(self) -> type[IbisSelector]:
return IbisSelector
class IbisSelector( # type: ignore[misc]
CompliantSelector["IbisLazyFrame", "ir.Value"], IbisExpr
):
def _to_expr(self) -> IbisExpr:
return IbisExpr(
self._call,
evaluate_output_names=self._evaluate_output_names,
alias_output_names=self._alias_output_names,
version=self._version,
)

View File

@@ -0,0 +1,41 @@
from __future__ import annotations
from typing import TYPE_CHECKING, Any, NoReturn
from narwhals._ibis.utils import native_to_narwhals_dtype
from narwhals.dependencies import get_ibis
if TYPE_CHECKING:
from types import ModuleType
from typing_extensions import Self
from narwhals._utils import Version
from narwhals.dtypes import DType
class IbisInterchangeSeries:
def __init__(self, df: Any, version: Version) -> None:
self._native_series = df
self._version = version
def __narwhals_series__(self) -> Self:
return self
def __native_namespace__(self) -> ModuleType:
return get_ibis()
@property
def dtype(self) -> DType:
return native_to_narwhals_dtype(
self._native_series.schema().types[0], self._version
)
def __getattr__(self, attr: str) -> NoReturn:
msg = (
f"Attribute {attr} is not supported for interchange-level dataframes.\n\n"
"If you would like to see this kind of object better supported in "
"Narwhals, please open a feature request "
"at https://github.com/narwhals-dev/narwhals/issues."
)
raise NotImplementedError(msg)

View File

@@ -0,0 +1,240 @@
from __future__ import annotations
from functools import lru_cache
from typing import TYPE_CHECKING, Any, Literal, cast
import ibis
import ibis.expr.datatypes as ibis_dtypes
from narwhals._utils import isinstance_or_issubclass
if TYPE_CHECKING:
from collections.abc import Mapping
from datetime import timedelta
import ibis.expr.types as ir
from ibis.common.temporal import TimestampUnit
from ibis.expr.datatypes import DataType as IbisDataType
from typing_extensions import TypeAlias, TypeIs
from narwhals._duration import IntervalUnit
from narwhals._ibis.dataframe import IbisLazyFrame
from narwhals._ibis.expr import IbisExpr
from narwhals._utils import Version
from narwhals.dtypes import DType
from narwhals.typing import IntoDType
lit = ibis.literal
"""Alias for `ibis.literal`."""
BucketUnit: TypeAlias = Literal[
"years",
"quarters",
"months",
"days",
"hours",
"minutes",
"seconds",
"milliseconds",
"microseconds",
"nanoseconds",
]
TruncateUnit: TypeAlias = Literal[
"Y", "Q", "M", "W", "D", "h", "m", "s", "ms", "us", "ns"
]
UNITS_DICT_BUCKET: Mapping[IntervalUnit, BucketUnit] = {
"y": "years",
"q": "quarters",
"mo": "months",
"d": "days",
"h": "hours",
"m": "minutes",
"s": "seconds",
"ms": "milliseconds",
"us": "microseconds",
"ns": "nanoseconds",
}
UNITS_DICT_TRUNCATE: Mapping[IntervalUnit, TruncateUnit] = {
"y": "Y",
"q": "Q",
"mo": "M",
"d": "D",
"h": "h",
"m": "m",
"s": "s",
"ms": "ms",
"us": "us",
"ns": "ns",
}
def evaluate_exprs(df: IbisLazyFrame, /, *exprs: IbisExpr) -> list[tuple[str, ir.Value]]:
native_results: list[tuple[str, ir.Value]] = []
for expr in exprs:
native_series_list = expr(df)
output_names = expr._evaluate_output_names(df)
if expr._alias_output_names is not None:
output_names = expr._alias_output_names(output_names)
if len(output_names) != len(native_series_list): # pragma: no cover
msg = f"Internal error: got output names {output_names}, but only got {len(native_series_list)} results"
raise AssertionError(msg)
native_results.extend(zip(output_names, native_series_list))
return native_results
@lru_cache(maxsize=16)
def native_to_narwhals_dtype(ibis_dtype: IbisDataType, version: Version) -> DType: # noqa: C901, PLR0912
dtypes = version.dtypes
if ibis_dtype.is_int64():
return dtypes.Int64()
if ibis_dtype.is_int32():
return dtypes.Int32()
if ibis_dtype.is_int16():
return dtypes.Int16()
if ibis_dtype.is_int8():
return dtypes.Int8()
if ibis_dtype.is_uint64():
return dtypes.UInt64()
if ibis_dtype.is_uint32():
return dtypes.UInt32()
if ibis_dtype.is_uint16():
return dtypes.UInt16()
if ibis_dtype.is_uint8():
return dtypes.UInt8()
if ibis_dtype.is_boolean():
return dtypes.Boolean()
if ibis_dtype.is_float64():
return dtypes.Float64()
if ibis_dtype.is_float32():
return dtypes.Float32()
if ibis_dtype.is_string():
return dtypes.String()
if ibis_dtype.is_date():
return dtypes.Date()
if is_timestamp(ibis_dtype):
_unit = cast("TimestampUnit", ibis_dtype.unit)
return dtypes.Datetime(time_unit=_unit.value, time_zone=ibis_dtype.timezone)
if is_interval(ibis_dtype):
_time_unit = ibis_dtype.unit.value
if _time_unit not in {"ns", "us", "ms", "s"}: # pragma: no cover
msg = f"Unsupported interval unit: {_time_unit}"
raise NotImplementedError(msg)
return dtypes.Duration(_time_unit)
if is_array(ibis_dtype):
if ibis_dtype.length:
return dtypes.Array(
native_to_narwhals_dtype(ibis_dtype.value_type, version),
ibis_dtype.length,
)
else:
return dtypes.List(native_to_narwhals_dtype(ibis_dtype.value_type, version))
if is_struct(ibis_dtype):
return dtypes.Struct(
[
dtypes.Field(name, native_to_narwhals_dtype(dtype, version))
for name, dtype in ibis_dtype.items()
]
)
if ibis_dtype.is_decimal(): # pragma: no cover
return dtypes.Decimal()
if ibis_dtype.is_time():
return dtypes.Time()
if ibis_dtype.is_binary():
return dtypes.Binary()
return dtypes.Unknown() # pragma: no cover
def is_timestamp(obj: IbisDataType) -> TypeIs[ibis_dtypes.Timestamp]:
return obj.is_timestamp()
def is_interval(obj: IbisDataType) -> TypeIs[ibis_dtypes.Interval]:
return obj.is_interval()
def is_array(obj: IbisDataType) -> TypeIs[ibis_dtypes.Array[Any]]:
return obj.is_array()
def is_struct(obj: IbisDataType) -> TypeIs[ibis_dtypes.Struct]:
return obj.is_struct()
def is_floating(obj: IbisDataType) -> TypeIs[ibis_dtypes.Floating]:
return obj.is_floating()
def narwhals_to_native_dtype( # noqa: C901, PLR0912
dtype: IntoDType, version: Version
) -> IbisDataType:
dtypes = version.dtypes
if isinstance_or_issubclass(dtype, dtypes.Decimal): # pragma: no cover
return ibis_dtypes.Decimal()
if isinstance_or_issubclass(dtype, dtypes.Float64):
return ibis_dtypes.Float64()
if isinstance_or_issubclass(dtype, dtypes.Float32):
return ibis_dtypes.Float32()
if isinstance_or_issubclass(dtype, dtypes.Int128): # pragma: no cover
msg = "Int128 not supported by Ibis"
raise NotImplementedError(msg)
if isinstance_or_issubclass(dtype, dtypes.Int64):
return ibis_dtypes.Int64()
if isinstance_or_issubclass(dtype, dtypes.Int32):
return ibis_dtypes.Int32()
if isinstance_or_issubclass(dtype, dtypes.Int16):
return ibis_dtypes.Int16()
if isinstance_or_issubclass(dtype, dtypes.Int8):
return ibis_dtypes.Int8()
if isinstance_or_issubclass(dtype, dtypes.UInt128): # pragma: no cover
msg = "UInt128 not supported by Ibis"
raise NotImplementedError(msg)
if isinstance_or_issubclass(dtype, dtypes.UInt64):
return ibis_dtypes.UInt64()
if isinstance_or_issubclass(dtype, dtypes.UInt32):
return ibis_dtypes.UInt32()
if isinstance_or_issubclass(dtype, dtypes.UInt16):
return ibis_dtypes.UInt16()
if isinstance_or_issubclass(dtype, dtypes.UInt8):
return ibis_dtypes.UInt8()
if isinstance_or_issubclass(dtype, dtypes.String):
return ibis_dtypes.String()
if isinstance_or_issubclass(dtype, dtypes.Boolean):
return ibis_dtypes.Boolean()
if isinstance_or_issubclass(dtype, dtypes.Categorical):
msg = "Categorical not supported by Ibis"
raise NotImplementedError(msg)
if isinstance_or_issubclass(dtype, dtypes.Datetime):
return ibis_dtypes.Timestamp.from_unit(dtype.time_unit, timezone=dtype.time_zone)
if isinstance_or_issubclass(dtype, dtypes.Duration):
return ibis_dtypes.Interval(unit=dtype.time_unit) # pyright: ignore[reportArgumentType]
if isinstance_or_issubclass(dtype, dtypes.Date):
return ibis_dtypes.Date()
if isinstance_or_issubclass(dtype, dtypes.Time):
return ibis_dtypes.Time()
if isinstance_or_issubclass(dtype, dtypes.List):
inner = narwhals_to_native_dtype(dtype.inner, version)
return ibis_dtypes.Array(value_type=inner)
if isinstance_or_issubclass(dtype, dtypes.Struct):
fields = [
(field.name, narwhals_to_native_dtype(field.dtype, version))
for field in dtype.fields
]
return ibis_dtypes.Struct.from_tuples(fields)
if isinstance_or_issubclass(dtype, dtypes.Array):
inner = narwhals_to_native_dtype(dtype.inner, version)
return ibis_dtypes.Array(value_type=inner, length=dtype.size)
if isinstance_or_issubclass(dtype, dtypes.Binary):
return ibis_dtypes.Binary()
if isinstance_or_issubclass(dtype, dtypes.Enum):
# Ibis does not support: https://github.com/ibis-project/ibis/issues/10991
msg = "Enum not supported by Ibis"
raise NotImplementedError(msg)
msg = f"Unknown dtype: {dtype}" # pragma: no cover
raise AssertionError(msg)
def timedelta_to_ibis_interval(td: timedelta) -> ibis.expr.types.temporal.IntervalScalar:
return ibis.interval(days=td.days, seconds=td.seconds, microseconds=td.microseconds)