Gestion_sondes/myenv/lib/python3.11/site-packages/narwhals/_ibis/expr.py

from __future__ import annotations

import operator
from functools import partial
from typing import TYPE_CHECKING, Any, Callable, Literal, TypeVar, cast

import ibis

from narwhals._compliant import LazyExpr, WindowInputs
from narwhals._expression_parsing import (
    combine_alias_output_names,
    combine_evaluate_output_names,
)
from narwhals._ibis.expr_dt import IbisExprDateTimeNamespace
from narwhals._ibis.expr_list import IbisExprListNamespace
from narwhals._ibis.expr_str import IbisExprStringNamespace
from narwhals._ibis.expr_struct import IbisExprStructNamespace
from narwhals._ibis.utils import is_floating, lit, narwhals_to_native_dtype
from narwhals._utils import Implementation, not_implemented

if TYPE_CHECKING:
    from collections.abc import Iterable, Iterator, Sequence

    import ibis.expr.types as ir
    from typing_extensions import Self

    from narwhals._compliant.typing import (
        AliasNames,
        EvalNames,
        EvalSeries,
        WindowFunction,
    )
    from narwhals._expression_parsing import ExprKind, ExprMetadata
    from narwhals._ibis.dataframe import IbisLazyFrame
    from narwhals._ibis.namespace import IbisNamespace
    from narwhals._utils import Version, _LimitedContext
    from narwhals.typing import IntoDType, RankMethod, RollingInterpolationMethod

    ExprT = TypeVar("ExprT", bound=ir.Value)
    IbisWindowFunction = WindowFunction[IbisLazyFrame, ir.Value]
    IbisWindowInputs = WindowInputs[ir.Value]


class IbisExpr(LazyExpr["IbisLazyFrame", "ir.Column"]):
    _implementation = Implementation.IBIS

    def __init__(
        self,
        call: EvalSeries[IbisLazyFrame, ir.Value],
        window_function: IbisWindowFunction | None = None,
        *,
        evaluate_output_names: EvalNames[IbisLazyFrame],
        alias_output_names: AliasNames | None,
        version: Version,
    ) -> None:
        self._call = call
        self._evaluate_output_names = evaluate_output_names
        self._alias_output_names = alias_output_names
        self._version = version
        self._metadata: ExprMetadata | None = None
        self._window_function: IbisWindowFunction | None = window_function

    @property
    def window_function(self) -> IbisWindowFunction:
        def default_window_func(
            df: IbisLazyFrame, window_inputs: IbisWindowInputs
        ) -> list[ir.Value]:
            return [
                expr.over(
                    ibis.window(
                        group_by=window_inputs.partition_by,
                        order_by=self._sort(*window_inputs.order_by),
                    )
                )
                for expr in self(df)
            ]

        return self._window_function or default_window_func

    def __call__(self, df: IbisLazyFrame) -> Sequence[ir.Value]:
        return self._call(df)

    def __narwhals_expr__(self) -> None: ...

    def __narwhals_namespace__(self) -> IbisNamespace:  # pragma: no cover
        from narwhals._ibis.namespace import IbisNamespace

        return IbisNamespace(version=self._version)

    def _cum_window_func(
        self, *, reverse: bool, func_name: Literal["sum", "max", "min", "count"]
    ) -> IbisWindowFunction:
        def func(df: IbisLazyFrame, inputs: IbisWindowInputs) -> Sequence[ir.Value]:
            window = ibis.window(
                group_by=list(inputs.partition_by),
                order_by=self._sort(
                    *inputs.order_by, descending=reverse, nulls_last=reverse
                ),
                preceding=None,  # unbounded
                following=0,
            )

            return [getattr(expr, func_name)().over(window) for expr in self(df)]

        return func

    def _rolling_window_func(
        self,
        *,
        func_name: Literal["sum", "mean", "std", "var"],
        center: bool,
        window_size: int,
        min_samples: int,
        ddof: int | None = None,
    ) -> IbisWindowFunction:
        supported_funcs = ["sum", "mean", "std", "var"]

        if center:
            preceding = window_size // 2
            following = window_size - preceding - 1
        else:
            preceding = window_size - 1
            following = 0

        def func(df: IbisLazyFrame, inputs: IbisWindowInputs) -> Sequence[ir.Value]:
            window = ibis.window(
                group_by=list(inputs.partition_by),
                order_by=self._sort(*inputs.order_by),
                preceding=preceding,
                following=following,
            )

            def inner_f(expr: ir.NumericColumn) -> ir.Value:
                if func_name in {"sum", "mean"}:
                    func_ = getattr(expr, func_name)()
                elif func_name == "var" and ddof == 0:
                    func_ = expr.var(how="pop")
                elif func_name in "var" and ddof == 1:
                    func_ = expr.var(how="sample")
                elif func_name == "std" and ddof == 0:
                    func_ = expr.std(how="pop")
                elif func_name == "std" and ddof == 1:
                    func_ = expr.std(how="sample")
                elif func_name in {"var", "std"}:  # pragma: no cover
                    msg = f"Only ddof=0 and ddof=1 are currently supported for rolling_{func_name}."
                    raise ValueError(msg)
                else:  # pragma: no cover
                    msg = f"Only the following functions are supported: {supported_funcs}.\nGot: {func_name}."
                    raise ValueError(msg)

                rolling_calc = func_.over(window)
                valid_count = expr.count().over(window)
                return ibis.cases(
                    (valid_count >= ibis.literal(min_samples), rolling_calc),
                    else_=ibis.null(),
                )

            return [inner_f(cast("ir.NumericColumn", expr)) for expr in self(df)]

        return func

    def broadcast(self, kind: Literal[ExprKind.AGGREGATION, ExprKind.LITERAL]) -> Self:
        # Ibis does its own broadcasting.
        return self

    def _sort(
        self, *cols: ir.Column | str, descending: bool = False, nulls_last: bool = False
    ) -> Iterator[ir.Column]:
        mapping = {
            (False, False): partial(ibis.asc, nulls_first=True),
            (False, True): partial(ibis.asc, nulls_first=False),
            (True, False): partial(ibis.desc, nulls_first=True),
            (True, True): partial(ibis.desc, nulls_first=False),
        }
        sort = mapping[(descending, nulls_last)]
        yield from (cast("ir.Column", sort(col)) for col in cols)

    @classmethod
    def from_column_names(
        cls: type[Self],
        evaluate_column_names: EvalNames[IbisLazyFrame],
        /,
        *,
        context: _LimitedContext,
    ) -> Self:
        def func(df: IbisLazyFrame) -> list[ir.Column]:
            return [df.native[name] for name in evaluate_column_names(df)]

        return cls(
            func,
            evaluate_output_names=evaluate_column_names,
            alias_output_names=None,
            version=context._version,
        )

    @classmethod
    def from_column_indices(cls, *column_indices: int, context: _LimitedContext) -> Self:
        def func(df: IbisLazyFrame) -> list[ir.Column]:
            return [df.native[i] for i in column_indices]

        return cls(
            func,
            evaluate_output_names=cls._eval_names_indices(column_indices),
            alias_output_names=None,
            version=context._version,
        )

    @classmethod
    def _from_elementwise_horizontal_op(
        cls, func: Callable[[Iterable[ir.Value]], ir.Value], *exprs: Self
    ) -> Self:
        def call(df: IbisLazyFrame) -> list[ir.Value]:
            cols = (col for _expr in exprs for col in _expr(df))
            return [func(cols)]

        context = exprs[0]
        return cls(
            call=call,
            evaluate_output_names=combine_evaluate_output_names(*exprs),
            alias_output_names=combine_alias_output_names(*exprs),
            version=context._version,
        )

    def _with_callable(
        self, call: Callable[..., ir.Value], /, **expressifiable_args: Self | Any
    ) -> Self:
        """Create expression from callable.

        Arguments:
            call: Callable from compliant DataFrame to native Expression
            expr_name: Expression name
            expressifiable_args: arguments pass to expression which should be parsed
                as expressions (e.g. in `nw.col('a').is_between('b', 'c')`)
        """

        def func(df: IbisLazyFrame) -> list[ir.Value]:
            native_series_list = self(df)
            other_native_series = {
                key: df._evaluate_expr(value) if self._is_expr(value) else lit(value)
                for key, value in expressifiable_args.items()
            }
            return [
                call(native_series, **other_native_series)
                for native_series in native_series_list
            ]

        return self.__class__(
            func,
            evaluate_output_names=self._evaluate_output_names,
            alias_output_names=self._alias_output_names,
            version=self._version,
        )

    def _with_binary(self, op: Callable[..., ir.Value], other: Self | Any) -> Self:
        return self._with_callable(op, other=other)

    def _with_alias_output_names(self, func: AliasNames | None, /) -> Self:
        return type(self)(
            self._call,
            self._window_function,
            evaluate_output_names=self._evaluate_output_names,
            alias_output_names=func,
            version=self._version,
        )

    def _with_window_function(self, window_function: IbisWindowFunction) -> Self:
        return self.__class__(
            self._call,
            window_function,
            evaluate_output_names=self._evaluate_output_names,
            alias_output_names=self._alias_output_names,
            version=self._version,
        )

    @classmethod
    def _alias_native(cls, expr: ExprT, name: str, /) -> ExprT:
        return cast("ExprT", expr.name(name))

    def __invert__(self) -> Self:
        invert = cast("Callable[..., ir.Value]", operator.invert)
        return self._with_callable(invert)

    def abs(self) -> Self:
        return self._with_callable(lambda expr: expr.abs())

    def mean(self) -> Self:
        return self._with_callable(lambda expr: expr.mean())

    def median(self) -> Self:
        return self._with_callable(lambda expr: expr.median())

    def all(self) -> Self:
        return self._with_callable(lambda expr: expr.all().fill_null(lit(True)))  # noqa: FBT003

    def any(self) -> Self:
        return self._with_callable(lambda expr: expr.any().fill_null(lit(False)))  # noqa: FBT003

    def quantile(
        self, quantile: float, interpolation: RollingInterpolationMethod
    ) -> Self:
        if interpolation != "linear":
            msg = "Only linear interpolation methods are supported for Ibis quantile."
            raise NotImplementedError(msg)
        return self._with_callable(lambda expr: expr.quantile(quantile))

    def clip(self, lower_bound: Any, upper_bound: Any) -> Self:
        def _clip(
            expr: ir.NumericValue, lower: Any | None = None, upper: Any | None = None
        ) -> ir.NumericValue:
            return expr.clip(lower=lower, upper=upper)

        if lower_bound is None:
            return self._with_callable(_clip, upper=upper_bound)
        if upper_bound is None:
            return self._with_callable(_clip, lower=lower_bound)
        return self._with_callable(_clip, lower=lower_bound, upper=upper_bound)

    def sum(self) -> Self:
        return self._with_callable(lambda expr: expr.sum().fill_null(lit(0)))

    def n_unique(self) -> Self:
        return self._with_callable(
            lambda expr: expr.nunique() + expr.isnull().any().cast("int8")
        )

    def count(self) -> Self:
        return self._with_callable(lambda expr: expr.count())

    def len(self) -> Self:
        def func(df: IbisLazyFrame) -> list[ir.IntegerScalar]:
            return [df.native.count()]

        return self.__class__(
            func,
            evaluate_output_names=self._evaluate_output_names,
            alias_output_names=self._alias_output_names,
            version=self._version,
        )

    def std(self, ddof: int) -> Self:
        def _std(expr: ir.NumericColumn, ddof: int) -> ir.Value:
            if ddof == 0:
                return expr.std(how="pop")
            elif ddof == 1:
                return expr.std(how="sample")
            else:
                n_samples = expr.count()
                std_pop = expr.std(how="pop")
                ddof_lit = cast("ir.IntegerScalar", ibis.literal(ddof))
                return std_pop * n_samples.sqrt() / (n_samples - ddof_lit).sqrt()

        return self._with_callable(lambda expr: _std(expr, ddof))

    def var(self, ddof: int) -> Self:
        def _var(expr: ir.NumericColumn, ddof: int) -> ir.Value:
            if ddof == 0:
                return expr.var(how="pop")
            elif ddof == 1:
                return expr.var(how="sample")
            else:
                n_samples = expr.count()
                var_pop = expr.var(how="pop")
                ddof_lit = cast("ir.IntegerScalar", ibis.literal(ddof))
                return var_pop * n_samples / (n_samples - ddof_lit)

        return self._with_callable(lambda expr: _var(expr, ddof))

    def max(self) -> Self:
        return self._with_callable(lambda expr: expr.max())

    def min(self) -> Self:
        return self._with_callable(lambda expr: expr.min())

    def null_count(self) -> Self:
        return self._with_callable(lambda expr: expr.isnull().sum())

    def over(self, partition_by: Sequence[str], order_by: Sequence[str]) -> Self:
        def func(df: IbisLazyFrame) -> Sequence[ir.Value]:
            return self.window_function(df, WindowInputs(partition_by, order_by))

        return self.__class__(
            func,
            evaluate_output_names=self._evaluate_output_names,
            alias_output_names=self._alias_output_names,
            version=self._version,
        )

    def is_null(self) -> Self:
        return self._with_callable(lambda expr: expr.isnull())

    def is_nan(self) -> Self:
        def func(expr: ir.FloatingValue | Any) -> ir.Value:
            otherwise = expr.isnan() if is_floating(expr.type()) else False
            return ibis.ifelse(expr.isnull(), None, otherwise)

        return self._with_callable(func)

    def is_finite(self) -> Self:
        return self._with_callable(lambda expr: ~(expr.isinf() | expr.isnan()))

    def is_in(self, other: Sequence[Any]) -> Self:
        return self._with_callable(lambda expr: expr.isin(other))

    def round(self, decimals: int) -> Self:
        return self._with_callable(lambda expr: expr.round(decimals))

    def shift(self, n: int) -> Self:
        def _func(df: IbisLazyFrame, inputs: IbisWindowInputs) -> Sequence[ir.Value]:
            return [
                expr.lag(n).over(  # type: ignore[attr-defined, unused-ignore]
                    ibis.window(
                        group_by=inputs.partition_by,
                        order_by=self._sort(*inputs.order_by),
                    )
                )
                for expr in self(df)
            ]

        return self._with_window_function(_func)

    def is_first_distinct(self) -> Self:
        def func(
            df: IbisLazyFrame, inputs: IbisWindowInputs
        ) -> Sequence[ir.BooleanValue]:
            # ibis row_number starts at 0, so need to compare with 0 instead of the usual `1`
            return [
                ibis.row_number().over(
                    ibis.window(
                        group_by=[*inputs.partition_by, expr],
                        order_by=self._sort(*inputs.order_by),
                    )
                )
                == lit(0)
                for expr in self(df)
            ]

        return self._with_window_function(func)

    def is_last_distinct(self) -> Self:
        def func(
            df: IbisLazyFrame, inputs: IbisWindowInputs
        ) -> Sequence[ir.BooleanValue]:
            # ibis row_number starts at 0, so need to compare with 0 instead of the usual `1`
            return [
                ibis.row_number().over(
                    ibis.window(
                        group_by=[*inputs.partition_by, expr],
                        order_by=self._sort(
                            *inputs.order_by, descending=True, nulls_last=True
                        ),
                    )
                )
                == lit(0)
                for expr in self(df)
            ]

        return self._with_window_function(func)

    def diff(self) -> Self:
        def _func(df: IbisLazyFrame, inputs: IbisWindowInputs) -> Sequence[ir.Value]:
            return [
                expr
                - expr.lag().over(  # type: ignore[attr-defined, unused-ignore]
                    ibis.window(
                        following=0,
                        group_by=inputs.partition_by,
                        order_by=self._sort(*inputs.order_by),
                    )
                )
                for expr in self(df)
            ]

        return self._with_window_function(_func)

    def cum_sum(self, *, reverse: bool) -> Self:
        return self._with_window_function(
            self._cum_window_func(reverse=reverse, func_name="sum")
        )

    def cum_max(self, *, reverse: bool) -> Self:
        return self._with_window_function(
            self._cum_window_func(reverse=reverse, func_name="max")
        )

    def cum_min(self, *, reverse: bool) -> Self:
        return self._with_window_function(
            self._cum_window_func(reverse=reverse, func_name="min")
        )

    def cum_count(self, *, reverse: bool) -> Self:
        return self._with_window_function(
            self._cum_window_func(reverse=reverse, func_name="count")
        )

    def rolling_sum(self, window_size: int, *, min_samples: int, center: bool) -> Self:
        return self._with_window_function(
            self._rolling_window_func(
                func_name="sum",
                center=center,
                window_size=window_size,
                min_samples=min_samples,
            )
        )

    def rolling_mean(self, window_size: int, *, min_samples: int, center: bool) -> Self:
        return self._with_window_function(
            self._rolling_window_func(
                func_name="mean",
                center=center,
                window_size=window_size,
                min_samples=min_samples,
            )
        )

    def rolling_var(
        self, window_size: int, *, min_samples: int, center: bool, ddof: int
    ) -> Self:
        return self._with_window_function(
            self._rolling_window_func(
                func_name="var",
                center=center,
                window_size=window_size,
                min_samples=min_samples,
                ddof=ddof,
            )
        )

    def rolling_std(
        self, window_size: int, *, min_samples: int, center: bool, ddof: int
    ) -> Self:
        return self._with_window_function(
            self._rolling_window_func(
                func_name="std",
                center=center,
                window_size=window_size,
                min_samples=min_samples,
                ddof=ddof,
            )
        )

    def fill_null(self, value: Self | Any, strategy: Any, limit: int | None) -> Self:
        # Ibis doesn't yet allow ignoring nulls in first/last with window functions, which makes forward/backward
        # strategies inconsistent when there are nulls present: https://github.com/ibis-project/ibis/issues/9539
        if strategy is not None:
            msg = "`strategy` is not supported for the Ibis backend"
            raise NotImplementedError(msg)
        if limit is not None:
            msg = "`limit` is not supported for the Ibis backend"  # pragma: no cover
            raise NotImplementedError(msg)

        def _fill_null(expr: ir.Value, value: ir.Scalar) -> ir.Value:
            return expr.fill_null(value)

        return self._with_callable(_fill_null, value=value)

    def cast(self, dtype: IntoDType) -> Self:
        def _func(expr: ir.Column) -> ir.Value:
            native_dtype = narwhals_to_native_dtype(dtype, self._version)
            # ibis `cast` overloads do not include DataType, only literals
            return expr.cast(native_dtype)  # type: ignore[unused-ignore]

        return self._with_callable(_func)

    def is_unique(self) -> Self:
        return self._with_callable(
            lambda expr: expr.isnull().count().over(ibis.window(group_by=(expr))) == 1
        )

    def rank(self, method: RankMethod, *, descending: bool) -> Self:
        def _rank(expr: ir.Column) -> ir.Column:
            order_by = next(self._sort(expr, descending=descending, nulls_last=True))
            window = ibis.window(order_by=order_by)

            if method == "dense":
                rank_ = order_by.dense_rank()
            elif method == "ordinal":
                rank_ = cast("ir.IntegerColumn", ibis.row_number().over(window))
            else:
                rank_ = order_by.rank()

            # Ibis uses 0-based ranking. Add 1 to match polars 1-based rank.
            rank_ = rank_ + cast("ir.IntegerValue", lit(1))

            # For "max" and "average", adjust using the count of rows in the partition.
            if method == "max":
                # Define a window partitioned by expr (i.e. each distinct value)
                partition = ibis.window(group_by=[expr])
                cnt = cast("ir.IntegerValue", expr.count().over(partition))
                rank_ = rank_ + cnt - cast("ir.IntegerValue", lit(1))
            elif method == "average":
                partition = ibis.window(group_by=[expr])
                cnt = cast("ir.IntegerValue", expr.count().over(partition))
                avg = cast(
                    "ir.NumericValue", (cnt - cast("ir.IntegerScalar", lit(1))) / lit(2.0)
                )
                rank_ = rank_ + avg

            return cast("ir.Column", ibis.cases((expr.notnull(), rank_)))

        return self._with_callable(_rank)

    def log(self, base: float) -> Self:
        def _log(expr: ir.NumericColumn) -> ir.Value:
            otherwise = expr.log(cast("ir.NumericValue", lit(base)))
            return ibis.cases(
                (expr < lit(0), lit(float("nan"))),
                (expr == lit(0), lit(float("-inf"))),
                else_=otherwise,
            )

        return self._with_callable(_log)

    def exp(self) -> Self:
        def _exp(expr: ir.NumericColumn) -> ir.Value:
            return expr.exp()

        return self._with_callable(_exp)

    def sqrt(self) -> Self:
        def _sqrt(expr: ir.NumericColumn) -> ir.Value:
            return ibis.cases((expr < lit(0), lit(float("nan"))), else_=expr.sqrt())

        return self._with_callable(_sqrt)

    @property
    def str(self) -> IbisExprStringNamespace:
        return IbisExprStringNamespace(self)

    @property
    def dt(self) -> IbisExprDateTimeNamespace:
        return IbisExprDateTimeNamespace(self)

    @property
    def list(self) -> IbisExprListNamespace:
        return IbisExprListNamespace(self)

    @property
    def struct(self) -> IbisExprStructNamespace:
        return IbisExprStructNamespace(self)

    # NOTE: https://github.com/ibis-project/ibis/issues/10542
    cum_prod = not_implemented()
    drop_nulls = not_implemented()

    # NOTE: https://github.com/ibis-project/ibis/issues/11176
    skew = not_implemented()
    kurtosis = not_implemented()
    unique = not_implemented()