Mise à jour de Monitor.py et autres scripts

This commit is contained in:
Debian
2025-07-23 10:46:27 +02:00
parent 7081418ce0
commit 7de3e0fb50
8604 changed files with 2789953 additions and 295 deletions

View File

@@ -0,0 +1,37 @@
from .core import (
SHORTHAND_KEYS,
display_traceback,
infer_encoding_types,
infer_vegalite_type_for_pandas,
parse_shorthand,
sanitize_narwhals_dataframe,
sanitize_pandas_dataframe,
update_nested,
use_signature,
)
from .deprecation import AltairDeprecationWarning, deprecated, deprecated_warn
from .html import spec_to_html
from .plugin_registry import PluginRegistry
from .schemapi import Optional, SchemaBase, SchemaLike, Undefined, is_undefined
__all__ = (
"SHORTHAND_KEYS",
"AltairDeprecationWarning",
"Optional",
"PluginRegistry",
"SchemaBase",
"SchemaLike",
"Undefined",
"deprecated",
"deprecated_warn",
"display_traceback",
"infer_encoding_types",
"infer_vegalite_type_for_pandas",
"is_undefined",
"parse_shorthand",
"sanitize_narwhals_dataframe",
"sanitize_pandas_dataframe",
"spec_to_html",
"update_nested",
"use_signature",
)

View File

@@ -0,0 +1,164 @@
# DataFrame Interchange Protocol Types
# Copied from https://data-apis.org/dataframe-protocol/latest/API.html,
# changed ABCs to Protocols, and subset the type hints to only those that are
# relevant for Altair.
#
# These classes are only for use in type signatures
from __future__ import annotations
import enum
from typing import TYPE_CHECKING, Any, Protocol
if TYPE_CHECKING:
from collections.abc import Iterable
class DtypeKind(enum.IntEnum):
"""
Integer enum for data types.
Attributes
----------
INT : int
Matches to signed integer data type.
UINT : int
Matches to unsigned integer data type.
FLOAT : int
Matches to floating point data type.
BOOL : int
Matches to boolean data type.
STRING : int
Matches to string data type (UTF-8 encoded).
DATETIME : int
Matches to datetime data type.
CATEGORICAL : int
Matches to categorical data type.
"""
INT = 0
UINT = 1
FLOAT = 2
BOOL = 20
STRING = 21 # UTF-8
DATETIME = 22
CATEGORICAL = 23
# Type hint of first element would actually be DtypeKind but can't use that
# as other libraries won't use an instance of our own Enum in this module but have
# their own. Type checkers will raise an error on that even though the enums
# are identical.
class Column(Protocol):
@property
def dtype(self) -> tuple[Any, int, str, str]:
"""
Dtype description as a tuple ``(kind, bit-width, format string, endianness)``.
Bit-width : the number of bits as an integer
Format string : data type description format string in Apache Arrow C
Data Interface format.
Endianness : current only native endianness (``=``) is supported
Notes
-----
- Kind specifiers are aligned with DLPack where possible (hence the
jump to 20, leave enough room for future extension)
- Masks must be specified as boolean with either bit width 1 (for bit
masks) or 8 (for byte masks).
- Dtype width in bits was preferred over bytes
- Endianness isn't too useful, but included now in case in the future
we need to support non-native endianness
- Went with Apache Arrow format strings over NumPy format strings
because they're more complete from a dataframe perspective
- Format strings are mostly useful for datetime specification, and
for categoricals.
- For categoricals, the format string describes the type of the
categorical in the data buffer. In case of a separate encoding of
the categorical (e.g. an integer to string mapping), this can
be derived from ``self.describe_categorical``.
- Data types not included: complex, Arrow-style null, binary, decimal,
and nested (list, struct, map, union) dtypes.
"""
...
# Have to use a generic Any return type as not all libraries who implement
# the dataframe interchange protocol implement the TypedDict that is usually
# returned here in the same way. As TypedDicts are invariant, even a slight change
# will lead to an error by a type checker. See PR in which this code was added
# for details.
@property
def describe_categorical(self) -> Any:
"""
If the dtype is categorical, there are two options.
- There are only values in the data buffer.
- There is a separate non-categorical Column encoding categorical values.
Raises TypeError if the dtype is not categorical
Returns the dictionary with description on how to interpret the data buffer:
- "is_ordered" : bool, whether the ordering of dictionary indices is
semantically meaningful.
- "is_dictionary" : bool, whether a mapping of
categorical values to other objects exists
- "categories" : Column representing the (implicit) mapping of indices to
category values (e.g. an array of cat1, cat2, ...).
None if not a dictionary-style categorical.
TBD: are there any other in-memory representations that are needed?
"""
...
class DataFrame(Protocol):
"""
A data frame class, with only the methods required by the interchange protocol defined.
A "data frame" represents an ordered collection of named columns.
A column's "name" must be a unique string.
Columns may be accessed by name or by position.
This could be a public data frame class, or an object with the methods and
attributes defined on this DataFrame class could be returned from the
``__dataframe__`` method of a public data frame class in a library adhering
to the dataframe interchange protocol specification.
"""
def __dataframe__(
self, nan_as_null: bool = False, allow_copy: bool = True
) -> DataFrame:
"""
Construct a new exchange object, potentially changing the parameters.
``nan_as_null`` is a keyword intended for the consumer to tell the
producer to overwrite null values in the data with ``NaN``.
It is intended for cases where the consumer does not support the bit
mask or byte mask that is the producer's native representation.
``allow_copy`` is a keyword that defines whether or not the library is
allowed to make a copy of the data. For example, copying data would be
necessary if a library supports strided buffers, given that this protocol
specifies contiguous buffers.
"""
...
def column_names(self) -> Iterable[str]:
"""Return an iterator yielding the column names."""
...
def get_column_by_name(self, name: str) -> Column:
"""Return the column whose name is the indicated name."""
...
def get_chunks(self, n_chunks: int | None = None) -> Iterable[DataFrame]:
"""
Return an iterator yielding the chunks.
By default (None), yields the chunks that the data is stored as by the
producer. If given, ``n_chunks`` must be a multiple of
``self.num_chunks()``, meaning the producer must subdivide each chunk
before yielding it.
Note that the producer must ensure that all columns are chunked the
same way.
"""
...

View File

@@ -0,0 +1,113 @@
from __future__ import annotations
from importlib.metadata import version as importlib_version
from typing import TYPE_CHECKING
from packaging.version import Version
if TYPE_CHECKING:
from types import ModuleType
def import_vegafusion() -> ModuleType:
min_version = "1.5.0"
try:
import vegafusion as vf
version = importlib_version("vegafusion")
if Version(version) >= Version("2.0.0a0"):
# In VegaFusion 2.0 there is no vegafusion-python-embed package
return vf
else:
embed_version = importlib_version("vegafusion-python-embed")
if version != embed_version or Version(version) < Version(min_version):
msg = (
"The versions of the vegafusion and vegafusion-python-embed packages must match\n"
f"and must be version {min_version} or greater.\n"
f"Found:\n"
f" - vegafusion=={version}\n"
f" - vegafusion-python-embed=={embed_version}\n"
)
raise RuntimeError(msg)
return vf
except ImportError as err:
msg = (
'The "vegafusion" data transformer and chart.transformed_data feature requires\n'
f"version {min_version} or greater of the 'vegafusion-python-embed' and 'vegafusion' packages.\n"
"These can be installed with pip using:\n"
f' pip install "vegafusion[embed]>={min_version}"\n'
"Or with conda using:\n"
f' conda install -c conda-forge "vegafusion-python-embed>={min_version}" '
f'"vegafusion>={min_version}"\n\n'
f"ImportError: {err.args[0]}"
)
raise ImportError(msg) from err
def import_vl_convert() -> ModuleType:
min_version = "1.6.0"
try:
version = importlib_version("vl-convert-python")
if Version(version) < Version(min_version):
msg = (
f"The vl-convert-python package must be version {min_version} or greater. "
f"Found version {version}"
)
raise RuntimeError(msg)
import vl_convert as vlc
return vlc
except ImportError as err:
msg = (
f"The vl-convert Vega-Lite compiler and file export feature requires\n"
f"version {min_version} or greater of the 'vl-convert-python' package. \n"
f"This can be installed with pip using:\n"
f' pip install "vl-convert-python>={min_version}"\n'
"or conda:\n"
f' conda install -c conda-forge "vl-convert-python>={min_version}"\n\n'
f"ImportError: {err.args[0]}"
)
raise ImportError(msg) from err
def vl_version_for_vl_convert() -> str:
from altair.vegalite import SCHEMA_VERSION
# Compute VlConvert's vl_version string (of the form 'v5_2')
# from SCHEMA_VERSION (of the form 'v5.2.0')
return "_".join(SCHEMA_VERSION.split(".")[:2])
def import_pyarrow_interchange() -> ModuleType:
min_version = "11.0.0"
try:
version = importlib_version("pyarrow")
if Version(version) < Version(min_version):
msg = (
f"The pyarrow package must be version {min_version} or greater. "
f"Found version {version}"
)
raise RuntimeError(msg)
import pyarrow.interchange as pi
return pi
except ImportError as err:
msg = (
f"Usage of the DataFrame Interchange Protocol requires\n"
f"version {min_version} or greater of the pyarrow package. \n"
f"This can be installed with pip using:\n"
f' pip install "pyarrow>={min_version}"\n'
"or conda:\n"
f' conda install -c conda-forge "pyarrow>={min_version}"\n\n'
f"ImportError: {err.args[0]}"
)
raise ImportError(msg) from err
def pyarrow_available() -> bool:
try:
import_pyarrow_interchange()
return True
except (ImportError, RuntimeError):
return False

View File

@@ -0,0 +1,75 @@
from __future__ import annotations
import webbrowser
from http.server import BaseHTTPRequestHandler, HTTPServer
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from collections.abc import Iterable
def open_html_in_browser(
html: str | bytes,
using: str | Iterable[str] | None = None,
port: int | None = None,
) -> None:
"""
Display an html document in a web browser without creating a temp file.
Instantiates a simple http server and uses the webbrowser module to
open the server's URL
Parameters
----------
html: str
HTML string to display
using: str or iterable of str
Name of the web browser to open (e.g. "chrome", "firefox", etc.).
If an iterable, choose the first browser available on the system.
If none, choose the system default browser.
port: int
Port to use. Defaults to a random port
"""
# Encode html to bytes
html_bytes = html.encode("utf8") if isinstance(html, str) else html
browser = None
if using is None:
browser = webbrowser.get(None)
else:
# normalize using to an iterable
if isinstance(using, str):
using = [using]
for browser_key in using:
try:
browser = webbrowser.get(browser_key)
if browser is not None:
break
except webbrowser.Error:
pass
if browser is None:
raise ValueError("Failed to locate a browser with name in " + str(using))
class OneShotRequestHandler(BaseHTTPRequestHandler):
def do_GET(self) -> None:
self.send_response(200)
self.send_header("Content-type", "text/html")
self.end_headers()
bufferSize = 1024 * 1024
for i in range(0, len(html_bytes), bufferSize):
self.wfile.write(html_bytes[i : i + bufferSize])
def log_message(self, format, *args):
# Silence stderr logging
pass
# Use specified port if provided, otherwise choose a random port (port value of 0)
server = HTTPServer(
("127.0.0.1", port if port is not None else 0), OneShotRequestHandler
)
browser.open(f"http://127.0.0.1:{server.server_port}")
server.handle_request()

View File

@@ -0,0 +1,567 @@
from __future__ import annotations
from typing import TYPE_CHECKING, Any, overload
from altair import (
Chart,
ConcatChart,
ConcatSpecGenericSpec,
FacetChart,
FacetedUnitSpec,
FacetSpec,
HConcatChart,
HConcatSpecGenericSpec,
LayerChart,
LayerSpec,
NonNormalizedSpec,
TopLevelConcatSpec,
TopLevelFacetSpec,
TopLevelHConcatSpec,
TopLevelLayerSpec,
TopLevelUnitSpec,
TopLevelVConcatSpec,
UnitSpec,
UnitSpecWithFrame,
VConcatChart,
VConcatSpecGenericSpec,
data_transformers,
)
from altair.utils._vegafusion_data import get_inline_tables, import_vegafusion
from altair.utils.schemapi import Undefined
if TYPE_CHECKING:
import sys
from collections.abc import Iterable
if sys.version_info >= (3, 10):
from typing import TypeAlias
else:
from typing_extensions import TypeAlias
from altair.typing import ChartType
from altair.utils.core import DataFrameLike
Scope: TypeAlias = tuple[int, ...]
FacetMapping: TypeAlias = dict[tuple[str, Scope], tuple[str, Scope]]
# For the transformed_data functionality, the chart classes in the values
# can be considered equivalent to the chart class in the key.
_chart_class_mapping = {
Chart: (
Chart,
TopLevelUnitSpec,
FacetedUnitSpec,
UnitSpec,
UnitSpecWithFrame,
NonNormalizedSpec,
),
LayerChart: (LayerChart, TopLevelLayerSpec, LayerSpec),
ConcatChart: (ConcatChart, TopLevelConcatSpec, ConcatSpecGenericSpec),
HConcatChart: (HConcatChart, TopLevelHConcatSpec, HConcatSpecGenericSpec),
VConcatChart: (VConcatChart, TopLevelVConcatSpec, VConcatSpecGenericSpec),
FacetChart: (FacetChart, TopLevelFacetSpec, FacetSpec),
}
@overload
def transformed_data(
chart: Chart | FacetChart,
row_limit: int | None = None,
exclude: Iterable[str] | None = None,
) -> DataFrameLike | None: ...
@overload
def transformed_data(
chart: LayerChart | HConcatChart | VConcatChart | ConcatChart,
row_limit: int | None = None,
exclude: Iterable[str] | None = None,
) -> list[DataFrameLike]: ...
def transformed_data(chart, row_limit=None, exclude=None):
"""
Evaluate a Chart's transforms.
Evaluate the data transforms associated with a Chart and return the
transformed data as one or more DataFrames
Parameters
----------
chart : Chart, FacetChart, LayerChart, HConcatChart, VConcatChart, or ConcatChart
Altair chart to evaluate transforms on
row_limit : int (optional)
Maximum number of rows to return for each DataFrame. None (default) for unlimited
exclude : iterable of str
Set of the names of charts to exclude
Returns
-------
DataFrame or list of DataFrames or None
If input chart is a Chart or Facet Chart, returns a DataFrame of the
transformed data. Otherwise, returns a list of DataFrames of the
transformed data
"""
vf = import_vegafusion()
# Add mark if none is specified to satisfy Vega-Lite
if isinstance(chart, Chart) and chart.mark == Undefined:
chart = chart.mark_point()
# Deep copy chart so that we can rename marks without affecting caller
chart = chart.copy(deep=True)
# Ensure that all views are named so that we can look them up in the
# resulting Vega specification
chart_names = name_views(chart, 0, exclude=exclude)
# Compile to Vega and extract inline DataFrames
with data_transformers.enable("vegafusion"):
vega_spec = chart.to_dict(format="vega", context={"pre_transform": False})
inline_datasets = get_inline_tables(vega_spec)
# Build mapping from mark names to vega datasets
facet_mapping = get_facet_mapping(vega_spec)
dataset_mapping = get_datasets_for_view_names(vega_spec, chart_names, facet_mapping)
# Build a list of vega dataset names that corresponds to the order
# of the chart components
dataset_names = []
for chart_name in chart_names:
if chart_name in dataset_mapping:
dataset_names.append(dataset_mapping[chart_name])
else:
msg = "Failed to locate all datasets"
raise ValueError(msg)
# Extract transformed datasets with VegaFusion
datasets, _ = vf.runtime.pre_transform_datasets(
vega_spec,
dataset_names,
row_limit=row_limit,
inline_datasets=inline_datasets,
)
if isinstance(chart, (Chart, FacetChart)):
# Return DataFrame (or None if it was excluded) if input was a simple Chart
if not datasets:
return None
else:
return datasets[0]
else:
# Otherwise return the list of DataFrames
return datasets
# The equivalent classes from _chart_class_mapping should also be added
# to the type hints below for `chart` as the function would also work for them.
# However, this was not possible so far as mypy then complains about
# "Overloaded function signatures 1 and 2 overlap with incompatible return types [misc]"
# This might be due to the complex type hierarchy of the chart classes.
# See also https://github.com/python/mypy/issues/5119
# and https://github.com/python/mypy/issues/4020 which show that mypy might not have
# a very consistent behavior for overloaded functions.
# The same error appeared when trying it with Protocols for the concat and layer charts.
# This function is only used internally and so we accept this inconsistency for now.
def name_views(
chart: ChartType, i: int = 0, exclude: Iterable[str] | None = None
) -> list[str]:
"""
Name unnamed chart views.
Name unnamed charts views so that we can look them up later in
the compiled Vega spec.
Note: This function mutates the input chart by applying names to
unnamed views.
Parameters
----------
chart : Chart, FacetChart, LayerChart, HConcatChart, VConcatChart, or ConcatChart
Altair chart to apply names to
i : int (default 0)
Starting chart index
exclude : iterable of str
Names of charts to exclude
Returns
-------
list of str
List of the names of the charts and subcharts
"""
exclude = set(exclude) if exclude is not None else set()
if isinstance(
chart, (_chart_class_mapping[Chart], _chart_class_mapping[FacetChart])
):
if chart.name not in exclude:
if chart.name in {None, Undefined}:
# Add name since none is specified
chart.name = Chart._get_name()
return [chart.name]
else:
return []
else:
subcharts: list[Any]
if isinstance(chart, _chart_class_mapping[LayerChart]):
subcharts = chart.layer
elif isinstance(chart, _chart_class_mapping[HConcatChart]):
subcharts = chart.hconcat
elif isinstance(chart, _chart_class_mapping[VConcatChart]):
subcharts = chart.vconcat
elif isinstance(chart, _chart_class_mapping[ConcatChart]):
subcharts = chart.concat
else:
msg = (
"transformed_data accepts an instance of "
"Chart, FacetChart, LayerChart, HConcatChart, VConcatChart, or ConcatChart\n"
f"Received value of type: {type(chart)}"
)
raise ValueError(msg)
chart_names: list[str] = []
for subchart in subcharts:
for name in name_views(subchart, i=i + len(chart_names), exclude=exclude):
chart_names.append(name)
return chart_names
def get_group_mark_for_scope(
vega_spec: dict[str, Any], scope: Scope
) -> dict[str, Any] | None:
"""
Get the group mark at a particular scope.
Parameters
----------
vega_spec : dict
Top-level Vega specification dictionary
scope : tuple of int
Scope tuple. If empty, the original Vega specification is returned.
Otherwise, the nested group mark at the scope specified is returned.
Returns
-------
dict or None
Top-level Vega spec (if scope is empty)
or group mark (if scope is non-empty)
or None (if group mark at scope does not exist)
Examples
--------
>>> spec = {
... "marks": [
... {"type": "group", "marks": [{"type": "symbol"}]},
... {"type": "group", "marks": [{"type": "rect"}]},
... ]
... }
>>> get_group_mark_for_scope(spec, (1,))
{'type': 'group', 'marks': [{'type': 'rect'}]}
"""
group = vega_spec
# Find group at scope
for scope_value in scope:
group_index = 0
child_group = None
for mark in group.get("marks", []):
if mark.get("type") == "group":
if group_index == scope_value:
child_group = mark
break
group_index += 1
if child_group is None:
return None
group = child_group
return group
def get_datasets_for_scope(vega_spec: dict[str, Any], scope: Scope) -> list[str]:
"""
Get the names of the datasets that are defined at a given scope.
Parameters
----------
vega_spec : dict
Top-leve Vega specification
scope : tuple of int
Scope tuple. If empty, the names of top-level datasets are returned
Otherwise, the names of the datasets defined in the nested group mark
at the specified scope are returned.
Returns
-------
list of str
List of the names of the datasets defined at the specified scope
Examples
--------
>>> spec = {
... "data": [{"name": "data1"}],
... "marks": [
... {
... "type": "group",
... "data": [{"name": "data2"}],
... "marks": [{"type": "symbol"}],
... },
... {
... "type": "group",
... "data": [
... {"name": "data3"},
... {"name": "data4"},
... ],
... "marks": [{"type": "rect"}],
... },
... ],
... }
>>> get_datasets_for_scope(spec, ())
['data1']
>>> get_datasets_for_scope(spec, (0,))
['data2']
>>> get_datasets_for_scope(spec, (1,))
['data3', 'data4']
Returns empty when no group mark exists at scope
>>> get_datasets_for_scope(spec, (1, 3))
[]
"""
group = get_group_mark_for_scope(vega_spec, scope) or {}
# get datasets from group
datasets = []
for dataset in group.get("data", []):
datasets.append(dataset["name"])
# Add facet dataset
facet_dataset = group.get("from", {}).get("facet", {}).get("name", None)
if facet_dataset:
datasets.append(facet_dataset)
return datasets
def get_definition_scope_for_data_reference(
vega_spec: dict[str, Any], data_name: str, usage_scope: Scope
) -> Scope | None:
"""
Return the scope that a dataset is defined at, for a given usage scope.
Parameters
----------
vega_spec: dict
Top-level Vega specification
data_name: str
The name of a dataset reference
usage_scope: tuple of int
The scope that the dataset is referenced in
Returns
-------
tuple of int
The scope where the referenced dataset is defined,
or None if no such dataset is found
Examples
--------
>>> spec = {
... "data": [{"name": "data1"}],
... "marks": [
... {
... "type": "group",
... "data": [{"name": "data2"}],
... "marks": [
... {
... "type": "symbol",
... "encode": {
... "update": {
... "x": {"field": "x", "data": "data1"},
... "y": {"field": "y", "data": "data2"},
... }
... },
... }
... ],
... }
... ],
... }
data1 is referenced at scope [0] and defined at scope []
>>> get_definition_scope_for_data_reference(spec, "data1", (0,))
()
data2 is referenced at scope [0] and defined at scope [0]
>>> get_definition_scope_for_data_reference(spec, "data2", (0,))
(0,)
If data2 is not visible at scope [] (the top level),
because it's defined in scope [0]
>>> repr(get_definition_scope_for_data_reference(spec, "data2", ()))
'None'
"""
for i in reversed(range(len(usage_scope) + 1)):
scope = usage_scope[:i]
datasets = get_datasets_for_scope(vega_spec, scope)
if data_name in datasets:
return scope
return None
def get_facet_mapping(group: dict[str, Any], scope: Scope = ()) -> FacetMapping:
"""
Create mapping from facet definitions to source datasets.
Parameters
----------
group : dict
Top-level Vega spec or nested group mark
scope : tuple of int
Scope of the group dictionary within a top-level Vega spec
Returns
-------
dict
Dictionary from (facet_name, facet_scope) to (dataset_name, dataset_scope)
Examples
--------
>>> spec = {
... "data": [{"name": "data1"}],
... "marks": [
... {
... "type": "group",
... "from": {
... "facet": {
... "name": "facet1",
... "data": "data1",
... "groupby": ["colA"],
... }
... },
... }
... ],
... }
>>> get_facet_mapping(spec)
{('facet1', (0,)): ('data1', ())}
"""
facet_mapping = {}
group_index = 0
mark_group = get_group_mark_for_scope(group, scope) or {}
for mark in mark_group.get("marks", []):
if mark.get("type", None) == "group":
# Get facet for this group
group_scope = (*scope, group_index)
facet = mark.get("from", {}).get("facet", None)
if facet is not None:
facet_name = facet.get("name", None)
facet_data = facet.get("data", None)
if facet_name is not None and facet_data is not None:
definition_scope = get_definition_scope_for_data_reference(
group, facet_data, scope
)
if definition_scope is not None:
facet_mapping[facet_name, group_scope] = (
facet_data,
definition_scope,
)
# Handle children recursively
child_mapping = get_facet_mapping(group, scope=group_scope)
facet_mapping.update(child_mapping)
group_index += 1
return facet_mapping
def get_from_facet_mapping(
scoped_dataset: tuple[str, Scope], facet_mapping: FacetMapping
) -> tuple[str, Scope]:
"""
Apply facet mapping to a scoped dataset.
Parameters
----------
scoped_dataset : (str, tuple of int)
A dataset name and scope tuple
facet_mapping : dict from (str, tuple of int) to (str, tuple of int)
The facet mapping produced by get_facet_mapping
Returns
-------
(str, tuple of int)
Dataset name and scope tuple that has been mapped as many times as possible
Examples
--------
Facet mapping as produced by get_facet_mapping
>>> facet_mapping = {
... ("facet1", (0,)): ("data1", ()),
... ("facet2", (0, 1)): ("facet1", (0,)),
... }
>>> get_from_facet_mapping(("facet2", (0, 1)), facet_mapping)
('data1', ())
"""
while scoped_dataset in facet_mapping:
scoped_dataset = facet_mapping[scoped_dataset]
return scoped_dataset
def get_datasets_for_view_names(
group: dict[str, Any],
vl_chart_names: list[str],
facet_mapping: FacetMapping,
scope: Scope = (),
) -> dict[str, tuple[str, Scope]]:
"""
Get the Vega datasets that correspond to the provided Altair view names.
Parameters
----------
group : dict
Top-level Vega spec or nested group mark
vl_chart_names : list of str
List of the Vega-Lite
facet_mapping : dict from (str, tuple of int) to (str, tuple of int)
The facet mapping produced by get_facet_mapping
scope : tuple of int
Scope of the group dictionary within a top-level Vega spec
Returns
-------
dict from str to (str, tuple of int)
Dict from Altair view names to scoped datasets
"""
datasets = {}
group_index = 0
mark_group = get_group_mark_for_scope(group, scope) or {}
for mark in mark_group.get("marks", []):
for vl_chart_name in vl_chart_names:
if mark.get("name", "") == f"{vl_chart_name}_cell":
data_name = mark.get("from", {}).get("facet", None).get("data", None)
scoped_data_name = (data_name, scope)
datasets[vl_chart_name] = get_from_facet_mapping(
scoped_data_name, facet_mapping
)
break
name = mark.get("name", "")
if mark.get("type", "") == "group":
group_data_names = get_datasets_for_view_names(
group, vl_chart_names, facet_mapping, scope=(*scope, group_index)
)
for k, v in group_data_names.items():
datasets.setdefault(k, v)
group_index += 1
else:
for vl_chart_name in vl_chart_names:
if name.startswith(vl_chart_name) and name.endswith("_marks"):
data_name = mark.get("from", {}).get("data", None)
scoped_data = get_definition_scope_for_data_reference(
group, data_name, scope
)
if scoped_data is not None:
datasets[vl_chart_name] = get_from_facet_mapping(
(data_name, scoped_data), facet_mapping
)
break
return datasets

View File

@@ -0,0 +1,304 @@
from __future__ import annotations
import uuid
from importlib.metadata import version as importlib_version
from typing import TYPE_CHECKING, Any, Callable, Final, TypedDict, Union, overload
from weakref import WeakValueDictionary
from narwhals.stable.v1.dependencies import is_into_dataframe
from packaging.version import Version
from altair.utils._importers import import_vegafusion
from altair.utils.core import DataFrameLike
from altair.utils.data import (
DataType,
MaxRowsError,
SupportsGeoInterface,
ToValuesReturnType,
)
from altair.vegalite.data import default_data_transformer
if TYPE_CHECKING:
import sys
from collections.abc import MutableMapping
from narwhals.stable.v1.typing import IntoDataFrame
from vegafusion.runtime import ChartState
if sys.version_info >= (3, 13):
from typing import TypeIs
else:
from typing_extensions import TypeIs
# Temporary storage for dataframes that have been extracted
# from charts by the vegafusion data transformer. Use a WeakValueDictionary
# rather than a dict so that the Python interpreter is free to garbage
# collect the stored DataFrames.
extracted_inline_tables: MutableMapping[str, DataFrameLike] = WeakValueDictionary()
# Special URL prefix that VegaFusion uses to denote that a
# dataset in a Vega spec corresponds to an entry in the `inline_datasets`
# kwarg of vf.runtime.pre_transform_spec().
VEGAFUSION_PREFIX: Final = "vegafusion+dataset://"
try:
VEGAFUSION_VERSION: Version | None = Version(importlib_version("vegafusion"))
except ImportError:
VEGAFUSION_VERSION = None
if VEGAFUSION_VERSION and Version("2.0.0a0") <= VEGAFUSION_VERSION:
def is_supported_by_vf(data: Any) -> TypeIs[DataFrameLike]:
# Test whether VegaFusion supports the data type
# VegaFusion v2 support narwhals-compatible DataFrames
return isinstance(data, DataFrameLike) or is_into_dataframe(data)
else:
def is_supported_by_vf(data: Any) -> TypeIs[DataFrameLike]:
return isinstance(data, DataFrameLike)
class _ToVegaFusionReturnUrlDict(TypedDict):
url: str
_VegaFusionReturnType = Union[_ToVegaFusionReturnUrlDict, ToValuesReturnType]
@overload
def vegafusion_data_transformer(
data: None = ..., max_rows: int = ...
) -> Callable[..., Any]: ...
@overload
def vegafusion_data_transformer(
data: DataFrameLike, max_rows: int = ...
) -> ToValuesReturnType: ...
@overload
def vegafusion_data_transformer(
data: dict | IntoDataFrame | SupportsGeoInterface, max_rows: int = ...
) -> _VegaFusionReturnType: ...
def vegafusion_data_transformer(
data: DataType | None = None, max_rows: int = 100000
) -> Callable[..., Any] | _VegaFusionReturnType:
"""VegaFusion Data Transformer."""
if data is None:
return vegafusion_data_transformer
if is_supported_by_vf(data) and not isinstance(data, SupportsGeoInterface):
table_name = f"table_{uuid.uuid4()}".replace("-", "_")
extracted_inline_tables[table_name] = data
return {"url": VEGAFUSION_PREFIX + table_name}
else:
# Use default transformer for geo interface objects
# # (e.g. a geopandas GeoDataFrame)
# Or if we don't recognize data type
return default_data_transformer(data)
def get_inline_table_names(vega_spec: dict[str, Any]) -> set[str]:
"""
Get a set of the inline datasets names in the provided Vega spec.
Inline datasets are encoded as URLs that start with the table://
prefix.
Parameters
----------
vega_spec: dict
A Vega specification dict
Returns
-------
set of str
Set of the names of the inline datasets that are referenced
in the specification.
Examples
--------
>>> spec = {
... "data": [
... {"name": "foo", "url": "https://path/to/file.csv"},
... {"name": "bar", "url": "vegafusion+dataset://inline_dataset_123"},
... ]
... }
>>> get_inline_table_names(spec)
{'inline_dataset_123'}
"""
table_names = set()
# Process datasets
for data in vega_spec.get("data", []):
url = data.get("url", "")
if url.startswith(VEGAFUSION_PREFIX):
name = url[len(VEGAFUSION_PREFIX) :]
table_names.add(name)
# Recursively process child marks, which may have their own datasets
for mark in vega_spec.get("marks", []):
table_names.update(get_inline_table_names(mark))
return table_names
def get_inline_tables(vega_spec: dict[str, Any]) -> dict[str, DataFrameLike]:
"""
Get the inline tables referenced by a Vega specification.
Note: This function should only be called on a Vega spec that corresponds
to a chart that was processed by the vegafusion_data_transformer.
Furthermore, this function may only be called once per spec because
the returned dataframes are deleted from internal storage.
Parameters
----------
vega_spec: dict
A Vega specification dict
Returns
-------
dict from str to dataframe
dict from inline dataset name to dataframe object
"""
inline_names = get_inline_table_names(vega_spec)
# exclude named dataset that was provided by the user,
# or dataframes that have been deleted.
table_names = inline_names.intersection(extracted_inline_tables)
return {k: extracted_inline_tables.pop(k) for k in table_names}
def compile_to_vegafusion_chart_state(
vegalite_spec: dict[str, Any], local_tz: str
) -> ChartState:
"""
Compile a Vega-Lite spec to a VegaFusion ChartState.
Note: This function should only be called on a Vega-Lite spec
that was generated with the "vegafusion" data transformer enabled.
In particular, this spec may contain references to extract datasets
using table:// prefixed URLs.
Parameters
----------
vegalite_spec: dict
A Vega-Lite spec that was generated from an Altair chart with
the "vegafusion" data transformer enabled
local_tz: str
Local timezone name (e.g. 'America/New_York')
Returns
-------
ChartState
A VegaFusion ChartState object
"""
# Local import to avoid circular ImportError
from altair import data_transformers, vegalite_compilers
vf = import_vegafusion()
# Compile Vega-Lite spec to Vega
compiler = vegalite_compilers.get()
if compiler is None:
msg = "No active vega-lite compiler plugin found"
raise ValueError(msg)
vega_spec = compiler(vegalite_spec)
# Retrieve dict of inline tables referenced by the spec
inline_tables = get_inline_tables(vega_spec)
# Pre-evaluate transforms in vega spec with vegafusion
row_limit = data_transformers.options.get("max_rows", None)
chart_state = vf.runtime.new_chart_state(
vega_spec,
local_tz=local_tz,
inline_datasets=inline_tables,
row_limit=row_limit,
)
# Check from row limit warning and convert to MaxRowsError
handle_row_limit_exceeded(row_limit, chart_state.get_warnings())
return chart_state
def compile_with_vegafusion(vegalite_spec: dict[str, Any]) -> dict[str, Any]:
"""
Compile a Vega-Lite spec to Vega and pre-transform with VegaFusion.
Note: This function should only be called on a Vega-Lite spec
that was generated with the "vegafusion" data transformer enabled.
In particular, this spec may contain references to extract datasets
using table:// prefixed URLs.
Parameters
----------
vegalite_spec: dict
A Vega-Lite spec that was generated from an Altair chart with
the "vegafusion" data transformer enabled
Returns
-------
dict
A Vega spec that has been pre-transformed by VegaFusion
"""
# Local import to avoid circular ImportError
from altair import data_transformers, vegalite_compilers
vf = import_vegafusion()
# Compile Vega-Lite spec to Vega
compiler = vegalite_compilers.get()
if compiler is None:
msg = "No active vega-lite compiler plugin found"
raise ValueError(msg)
vega_spec = compiler(vegalite_spec)
# Retrieve dict of inline tables referenced by the spec
inline_tables = get_inline_tables(vega_spec)
# Pre-evaluate transforms in vega spec with vegafusion
row_limit = data_transformers.options.get("max_rows", None)
transformed_vega_spec, warnings = vf.runtime.pre_transform_spec(
vega_spec,
vf.get_local_tz(),
inline_datasets=inline_tables,
row_limit=row_limit,
)
# Check from row limit warning and convert to MaxRowsError
handle_row_limit_exceeded(row_limit, warnings)
return transformed_vega_spec
def handle_row_limit_exceeded(row_limit: int, warnings: list):
for warning in warnings:
if warning.get("type") == "RowLimitExceeded":
msg = (
"The number of dataset rows after filtering and aggregation exceeds\n"
f"the current limit of {row_limit}. Try adding an aggregation to reduce\n"
"the size of the dataset that must be loaded into the browser. Or, disable\n"
"the limit by calling alt.data_transformers.disable_max_rows(). Note that\n"
"disabling this limit may cause the browser to freeze or crash."
)
raise MaxRowsError(msg)
def using_vegafusion() -> bool:
"""Check whether the vegafusion data transformer is enabled."""
# Local import to avoid circular ImportError
from altair import data_transformers
return data_transformers.active == "vegafusion"

View File

@@ -0,0 +1,12 @@
from typing import Any, Callable
from altair.utils import PluginRegistry
# ==============================================================================
# Vega-Lite to Vega compiler registry
# ==============================================================================
VegaLiteCompilerType = Callable[[dict[str, Any]], dict[str, Any]]
class VegaLiteCompilerRegistry(PluginRegistry[VegaLiteCompilerType, dict[str, Any]]):
pass

View File

@@ -0,0 +1,981 @@
"""Utility routines."""
from __future__ import annotations
import itertools
import json
import re
import sys
import traceback
import warnings
from collections.abc import Iterator, Mapping, MutableMapping
from copy import deepcopy
from itertools import groupby
from operator import itemgetter
from typing import TYPE_CHECKING, Any, Callable, Literal, TypeVar, cast, overload
import jsonschema
import narwhals.stable.v1 as nw
from narwhals.stable.v1.dependencies import is_pandas_dataframe, is_polars_dataframe
from narwhals.stable.v1.typing import IntoDataFrame
from altair.utils.schemapi import SchemaBase, SchemaLike, Undefined
if sys.version_info >= (3, 12):
from typing import Protocol, TypeAliasType, runtime_checkable
else:
from typing_extensions import Protocol, TypeAliasType, runtime_checkable
if sys.version_info >= (3, 10):
from typing import Concatenate, ParamSpec
else:
from typing_extensions import Concatenate, ParamSpec
if TYPE_CHECKING:
import typing as t
import pandas as pd
from narwhals.stable.v1.typing import IntoExpr
from altair.utils._dfi_types import DataFrame as DfiDataFrame
from altair.vegalite.v5.schema._typing import StandardType_T as InferredVegaLiteType
TIntoDataFrame = TypeVar("TIntoDataFrame", bound=IntoDataFrame)
T = TypeVar("T")
P = ParamSpec("P")
R = TypeVar("R")
WrapsFunc = TypeAliasType("WrapsFunc", Callable[..., R], type_params=(R,))
WrappedFunc = TypeAliasType("WrappedFunc", Callable[P, R], type_params=(P, R))
# NOTE: Requires stringized form to avoid `< (3, 11)` issues
# See: https://github.com/vega/altair/actions/runs/10667859416/job/29567290871?pr=3565
WrapsMethod = TypeAliasType(
"WrapsMethod", "Callable[Concatenate[T, ...], R]", type_params=(T, R)
)
WrappedMethod = TypeAliasType(
"WrappedMethod", Callable[Concatenate[T, P], R], type_params=(T, P, R)
)
@runtime_checkable
class DataFrameLike(Protocol):
def __dataframe__(
self, nan_as_null: bool = False, allow_copy: bool = True
) -> DfiDataFrame: ...
TYPECODE_MAP = {
"ordinal": "O",
"nominal": "N",
"quantitative": "Q",
"temporal": "T",
"geojson": "G",
}
INV_TYPECODE_MAP = {v: k for k, v in TYPECODE_MAP.items()}
# aggregates from vega-lite version 4.6.0
AGGREGATES = [
"argmax",
"argmin",
"average",
"count",
"distinct",
"max",
"mean",
"median",
"min",
"missing",
"product",
"q1",
"q3",
"ci0",
"ci1",
"stderr",
"stdev",
"stdevp",
"sum",
"valid",
"values",
"variance",
"variancep",
"exponential",
"exponentialb",
]
# window aggregates from vega-lite version 4.6.0
WINDOW_AGGREGATES = [
"row_number",
"rank",
"dense_rank",
"percent_rank",
"cume_dist",
"ntile",
"lag",
"lead",
"first_value",
"last_value",
"nth_value",
]
# timeUnits from vega-lite version 4.17.0
TIMEUNITS = [
"year",
"quarter",
"month",
"week",
"day",
"dayofyear",
"date",
"hours",
"minutes",
"seconds",
"milliseconds",
"yearquarter",
"yearquartermonth",
"yearmonth",
"yearmonthdate",
"yearmonthdatehours",
"yearmonthdatehoursminutes",
"yearmonthdatehoursminutesseconds",
"yearweek",
"yearweekday",
"yearweekdayhours",
"yearweekdayhoursminutes",
"yearweekdayhoursminutesseconds",
"yeardayofyear",
"quartermonth",
"monthdate",
"monthdatehours",
"monthdatehoursminutes",
"monthdatehoursminutesseconds",
"weekday",
"weeksdayhours",
"weekdayhours",
"weekdayhoursminutes",
"weekdayhoursminutesseconds",
"dayhours",
"dayhoursminutes",
"dayhoursminutesseconds",
"hoursminutes",
"hoursminutesseconds",
"minutesseconds",
"secondsmilliseconds",
"utcyear",
"utcquarter",
"utcmonth",
"utcweek",
"utcday",
"utcdayofyear",
"utcdate",
"utchours",
"utcminutes",
"utcseconds",
"utcmilliseconds",
"utcyearquarter",
"utcyearquartermonth",
"utcyearmonth",
"utcyearmonthdate",
"utcyearmonthdatehours",
"utcyearmonthdatehoursminutes",
"utcyearmonthdatehoursminutesseconds",
"utcyearweek",
"utcyearweekday",
"utcyearweekdayhours",
"utcyearweekdayhoursminutes",
"utcyearweekdayhoursminutesseconds",
"utcyeardayofyear",
"utcquartermonth",
"utcmonthdate",
"utcmonthdatehours",
"utcmonthdatehoursminutes",
"utcmonthdatehoursminutesseconds",
"utcweekday",
"utcweekdayhours",
"utcweekdayhoursminutes",
"utcweekdayhoursminutesseconds",
"utcdayhours",
"utcdayhoursminutes",
"utcdayhoursminutesseconds",
"utchoursminutes",
"utchoursminutesseconds",
"utcminutesseconds",
"utcsecondsmilliseconds",
]
VALID_TYPECODES = list(itertools.chain(iter(TYPECODE_MAP), iter(INV_TYPECODE_MAP)))
SHORTHAND_UNITS = {
"field": "(?P<field>.*)",
"type": "(?P<type>{})".format("|".join(VALID_TYPECODES)),
"agg_count": "(?P<aggregate>count)",
"op_count": "(?P<op>count)",
"aggregate": "(?P<aggregate>{})".format("|".join(AGGREGATES)),
"window_op": "(?P<op>{})".format("|".join(AGGREGATES + WINDOW_AGGREGATES)),
"timeUnit": "(?P<timeUnit>{})".format("|".join(TIMEUNITS)),
}
SHORTHAND_KEYS: frozenset[Literal["field", "aggregate", "type", "timeUnit"]] = (
frozenset(("field", "aggregate", "type", "timeUnit"))
)
def infer_vegalite_type_for_pandas(
data: Any,
) -> InferredVegaLiteType | tuple[InferredVegaLiteType, list[Any]]:
"""
From an array-like input, infer the correct vega typecode.
('ordinal', 'nominal', 'quantitative', or 'temporal').
Parameters
----------
data: Any
"""
# This is safe to import here, as this function is only called on pandas input.
from pandas.api.types import infer_dtype
typ = infer_dtype(data, skipna=False)
if typ in {
"floating",
"mixed-integer-float",
"integer",
"mixed-integer",
"complex",
}:
return "quantitative"
elif typ == "categorical" and hasattr(data, "cat") and data.cat.ordered:
return ("ordinal", data.cat.categories.tolist())
elif typ in {"string", "bytes", "categorical", "boolean", "mixed", "unicode"}:
return "nominal"
elif typ in {
"datetime",
"datetime64",
"timedelta",
"timedelta64",
"date",
"time",
"period",
}:
return "temporal"
else:
warnings.warn(
f"I don't know how to infer vegalite type from '{typ}'. "
"Defaulting to nominal.",
stacklevel=1,
)
return "nominal"
def merge_props_geom(feat: dict[str, Any]) -> dict[str, Any]:
"""
Merge properties with geometry.
* Overwrites 'type' and 'geometry' entries if existing.
"""
geom = {k: feat[k] for k in ("type", "geometry")}
try:
feat["properties"].update(geom)
props_geom = feat["properties"]
except (AttributeError, KeyError):
# AttributeError when 'properties' equals None
# KeyError when 'properties' is non-existing
props_geom = geom
return props_geom
def sanitize_geo_interface(geo: t.MutableMapping[Any, Any]) -> dict[str, Any]:
"""
Santize a geo_interface to prepare it for serialization.
* Make a copy
* Convert type array or _Array to list
* Convert tuples to lists (using json.loads/dumps)
* Merge properties with geometry
"""
geo = deepcopy(geo)
# convert type _Array or array to list
for key in geo:
if str(type(geo[key]).__name__).startswith(("_Array", "array")):
geo[key] = geo[key].tolist()
# convert (nested) tuples to lists
geo_dct: dict = json.loads(json.dumps(geo))
# sanitize features
if geo_dct["type"] == "FeatureCollection":
geo_dct = geo_dct["features"]
if len(geo_dct) > 0:
for idx, feat in enumerate(geo_dct):
geo_dct[idx] = merge_props_geom(feat)
elif geo_dct["type"] == "Feature":
geo_dct = merge_props_geom(geo_dct)
else:
geo_dct = {"type": "Feature", "geometry": geo_dct}
return geo_dct
def numpy_is_subtype(dtype: Any, subtype: Any) -> bool:
# This is only called on `numpy` inputs, so it's safe to import it here.
import numpy as np
try:
return np.issubdtype(dtype, subtype)
except (NotImplementedError, TypeError):
return False
def sanitize_pandas_dataframe(df: pd.DataFrame) -> pd.DataFrame: # noqa: C901
"""
Sanitize a DataFrame to prepare it for serialization.
* Make a copy
* Convert RangeIndex columns to strings
* Raise ValueError if column names are not strings
* Raise ValueError if it has a hierarchical index.
* Convert categoricals to strings.
* Convert np.bool_ dtypes to Python bool objects
* Convert np.int dtypes to Python int objects
* Convert floats to objects and replace NaNs/infs with None.
* Convert DateTime dtypes into appropriate string representations
* Convert Nullable integers to objects and replace NaN with None
* Convert Nullable boolean to objects and replace NaN with None
* convert dedicated string column to objects and replace NaN with None
* Raise a ValueError for TimeDelta dtypes
"""
# This is safe to import here, as this function is only called on pandas input.
# NumPy is a required dependency of pandas so is also safe to import.
import numpy as np
import pandas as pd
df = df.copy()
if isinstance(df.columns, pd.RangeIndex):
df.columns = df.columns.astype(str)
for col_name in df.columns:
if not isinstance(col_name, str):
msg = (
f"Dataframe contains invalid column name: {col_name!r}. "
"Column names must be strings"
)
raise ValueError(msg)
if isinstance(df.index, pd.MultiIndex):
msg = "Hierarchical indices not supported"
raise ValueError(msg)
if isinstance(df.columns, pd.MultiIndex):
msg = "Hierarchical indices not supported"
raise ValueError(msg)
def to_list_if_array(val):
if isinstance(val, np.ndarray):
return val.tolist()
else:
return val
for dtype_item in df.dtypes.items():
# We know that the column names are strings from the isinstance check
# further above but mypy thinks it is of type Hashable and therefore does not
# let us assign it to the col_name variable which is already of type str.
col_name = cast(str, dtype_item[0])
dtype = dtype_item[1]
dtype_name = str(dtype)
if dtype_name == "category":
# Work around bug in to_json for categorical types in older versions
# of pandas as they do not properly convert NaN values to null in to_json.
# We can probably remove this part once we require pandas >= 1.0
col = df[col_name].astype(object)
df[col_name] = col.where(col.notnull(), None)
elif dtype_name == "string":
# dedicated string datatype (since 1.0)
# https://pandas.pydata.org/pandas-docs/version/1.0.0/whatsnew/v1.0.0.html#dedicated-string-data-type
col = df[col_name].astype(object)
df[col_name] = col.where(col.notnull(), None)
elif dtype_name == "bool":
# convert numpy bools to objects; np.bool is not JSON serializable
df[col_name] = df[col_name].astype(object)
elif dtype_name == "boolean":
# dedicated boolean datatype (since 1.0)
# https://pandas.io/docs/user_guide/boolean.html
col = df[col_name].astype(object)
df[col_name] = col.where(col.notnull(), None)
elif dtype_name.startswith(("datetime", "timestamp")):
# Convert datetimes to strings. This needs to be a full ISO string
# with time, which is why we cannot use ``col.astype(str)``.
# This is because Javascript parses date-only times in UTC, but
# parses full ISO-8601 dates as local time, and dates in Vega and
# Vega-Lite are displayed in local time by default.
# (see https://github.com/vega/altair/issues/1027)
df[col_name] = (
df[col_name].apply(lambda x: x.isoformat()).replace("NaT", "")
)
elif dtype_name.startswith("timedelta"):
msg = (
f'Field "{col_name}" has type "{dtype}" which is '
"not supported by Altair. Please convert to "
"either a timestamp or a numerical value."
""
)
raise ValueError(msg)
elif dtype_name.startswith("geometry"):
# geopandas >=0.6.1 uses the dtype geometry. Continue here
# otherwise it will give an error on np.issubdtype(dtype, np.integer)
continue
elif (
dtype_name
in {
"Int8",
"Int16",
"Int32",
"Int64",
"UInt8",
"UInt16",
"UInt32",
"UInt64",
"Float32",
"Float64",
}
): # nullable integer datatypes (since 24.0) and nullable float datatypes (since 1.2.0)
# https://pandas.pydata.org/pandas-docs/version/0.25/whatsnew/v0.24.0.html#optional-integer-na-support
col = df[col_name].astype(object)
df[col_name] = col.where(col.notnull(), None)
elif numpy_is_subtype(dtype, np.integer):
# convert integers to objects; np.int is not JSON serializable
df[col_name] = df[col_name].astype(object)
elif numpy_is_subtype(dtype, np.floating):
# For floats, convert to Python float: np.float is not JSON serializable
# Also convert NaN/inf values to null, as they are not JSON serializable
col = df[col_name]
bad_values = col.isnull() | np.isinf(col)
df[col_name] = col.astype(object).where(~bad_values, None)
elif dtype == object: # noqa: E721
# Convert numpy arrays saved as objects to lists
# Arrays are not JSON serializable
col = df[col_name].astype(object).apply(to_list_if_array)
df[col_name] = col.where(col.notnull(), None)
return df
def sanitize_narwhals_dataframe(
data: nw.DataFrame[TIntoDataFrame],
) -> nw.DataFrame[TIntoDataFrame]:
"""Sanitize narwhals.DataFrame for JSON serialization."""
schema = data.schema
columns: list[IntoExpr] = []
# See https://github.com/vega/altair/issues/1027 for why this is necessary.
local_iso_fmt_string = "%Y-%m-%dT%H:%M:%S"
is_polars = is_polars_dataframe(data.to_native())
for name, dtype in schema.items():
if dtype == nw.Date and is_polars:
# Polars doesn't allow formatting `Date` with time directives.
# The date -> datetime cast is extremely fast compared with `to_string`
columns.append(
nw.col(name).cast(nw.Datetime).dt.to_string(local_iso_fmt_string)
)
elif dtype == nw.Date:
columns.append(nw.col(name).dt.to_string(local_iso_fmt_string))
elif dtype == nw.Datetime:
columns.append(nw.col(name).dt.to_string(f"{local_iso_fmt_string}%.f"))
elif dtype == nw.Duration:
msg = (
f'Field "{name}" has type "{dtype}" which is '
"not supported by Altair. Please convert to "
"either a timestamp or a numerical value."
""
)
raise ValueError(msg)
else:
columns.append(name)
return data.select(columns)
def to_eager_narwhals_dataframe(data: IntoDataFrame) -> nw.DataFrame[Any]:
"""
Wrap `data` in `narwhals.DataFrame`.
If `data` is not supported by Narwhals, but it is convertible
to a PyArrow table, then first convert to a PyArrow Table,
and then wrap in `narwhals.DataFrame`.
"""
data_nw = nw.from_native(data, eager_or_interchange_only=True)
if nw.get_level(data_nw) == "interchange":
# If Narwhals' support for `data`'s class is only metadata-level, then we
# use the interchange protocol to convert to a PyArrow Table.
from altair.utils.data import arrow_table_from_dfi_dataframe
pa_table = arrow_table_from_dfi_dataframe(data) # type: ignore[arg-type]
data_nw = nw.from_native(pa_table, eager_only=True)
return data_nw
def parse_shorthand( # noqa: C901
shorthand: dict[str, Any] | str,
data: IntoDataFrame | None = None,
parse_aggregates: bool = True,
parse_window_ops: bool = False,
parse_timeunits: bool = True,
parse_types: bool = True,
) -> dict[str, Any]:
"""
General tool to parse shorthand values.
These are of the form:
- "col_name"
- "col_name:O"
- "average(col_name)"
- "average(col_name):O"
Optionally, a dataframe may be supplied, from which the type
will be inferred if not specified in the shorthand.
Parameters
----------
shorthand : dict or string
The shorthand representation to be parsed
data : DataFrame, optional
If specified and of type DataFrame, then use these values to infer the
column type if not provided by the shorthand.
parse_aggregates : boolean
If True (default), then parse aggregate functions within the shorthand.
parse_window_ops : boolean
If True then parse window operations within the shorthand (default:False)
parse_timeunits : boolean
If True (default), then parse timeUnits from within the shorthand
parse_types : boolean
If True (default), then parse typecodes within the shorthand
Returns
-------
attrs : dict
a dictionary of attributes extracted from the shorthand
Examples
--------
>>> import pandas as pd
>>> data = pd.DataFrame({"foo": ["A", "B", "A", "B"], "bar": [1, 2, 3, 4]})
>>> parse_shorthand("name") == {"field": "name"}
True
>>> parse_shorthand("name:Q") == {"field": "name", "type": "quantitative"}
True
>>> parse_shorthand("average(col)") == {"aggregate": "average", "field": "col"}
True
>>> parse_shorthand("foo:O") == {"field": "foo", "type": "ordinal"}
True
>>> parse_shorthand("min(foo):Q") == {
... "aggregate": "min",
... "field": "foo",
... "type": "quantitative",
... }
True
>>> parse_shorthand("month(col)") == {
... "field": "col",
... "timeUnit": "month",
... "type": "temporal",
... }
True
>>> parse_shorthand("year(col):O") == {
... "field": "col",
... "timeUnit": "year",
... "type": "ordinal",
... }
True
>>> parse_shorthand("foo", data) == {"field": "foo", "type": "nominal"}
True
>>> parse_shorthand("bar", data) == {"field": "bar", "type": "quantitative"}
True
>>> parse_shorthand("bar:O", data) == {"field": "bar", "type": "ordinal"}
True
>>> parse_shorthand("sum(bar)", data) == {
... "aggregate": "sum",
... "field": "bar",
... "type": "quantitative",
... }
True
>>> parse_shorthand("count()", data) == {
... "aggregate": "count",
... "type": "quantitative",
... }
True
"""
from altair.utils.data import is_data_type
if not shorthand:
return {}
patterns = []
if parse_aggregates:
patterns.extend([r"{agg_count}\(\)"])
patterns.extend([r"{aggregate}\({field}\)"])
if parse_window_ops:
patterns.extend([r"{op_count}\(\)"])
patterns.extend([r"{window_op}\({field}\)"])
if parse_timeunits:
patterns.extend([r"{timeUnit}\({field}\)"])
patterns.extend([r"{field}"])
if parse_types:
patterns = list(itertools.chain(*((p + ":{type}", p) for p in patterns)))
regexps = (
re.compile(r"\A" + p.format(**SHORTHAND_UNITS) + r"\Z", re.DOTALL)
for p in patterns
)
# find matches depending on valid fields passed
if isinstance(shorthand, dict):
attrs = shorthand
else:
attrs = next(
exp.match(shorthand).groupdict() # type: ignore[union-attr]
for exp in regexps
if exp.match(shorthand) is not None
)
# Handle short form of the type expression
if "type" in attrs:
attrs["type"] = INV_TYPECODE_MAP.get(attrs["type"], attrs["type"])
# counts are quantitative by default
if attrs == {"aggregate": "count"}:
attrs["type"] = "quantitative"
# times are temporal by default
if "timeUnit" in attrs and "type" not in attrs:
attrs["type"] = "temporal"
# if data is specified and type is not, infer type from data
if "type" not in attrs and is_data_type(data):
unescaped_field = attrs["field"].replace("\\", "")
data_nw = nw.from_native(data, eager_or_interchange_only=True)
schema = data_nw.schema
if unescaped_field in schema:
column = data_nw[unescaped_field]
if schema[unescaped_field] in {
nw.Object,
nw.Unknown,
} and is_pandas_dataframe(data_nw.to_native()):
attrs["type"] = infer_vegalite_type_for_pandas(column.to_native())
else:
attrs["type"] = infer_vegalite_type_for_narwhals(column)
if isinstance(attrs["type"], tuple):
attrs["sort"] = attrs["type"][1]
attrs["type"] = attrs["type"][0]
# If an unescaped colon is still present, it's often due to an incorrect data type specification
# but could also be due to using a column name with ":" in it.
if (
"field" in attrs
and ":" in attrs["field"]
and attrs["field"][attrs["field"].rfind(":") - 1] != "\\"
):
raise ValueError(
'"{}" '.format(attrs["field"].split(":")[-1])
+ "is not one of the valid encoding data types: {}.".format(
", ".join(TYPECODE_MAP.values())
)
+ "\nFor more details, see https://altair-viz.github.io/user_guide/encodings/index.html#encoding-data-types. "
+ "If you are trying to use a column name that contains a colon, "
+ 'prefix it with a backslash; for example "column\\:name" instead of "column:name".'
)
return attrs
def infer_vegalite_type_for_narwhals(
column: nw.Series,
) -> InferredVegaLiteType | tuple[InferredVegaLiteType, list]:
dtype = column.dtype
if (
nw.is_ordered_categorical(column)
and not (categories := column.cat.get_categories()).is_empty()
):
return "ordinal", categories.to_list()
if dtype == nw.String or dtype == nw.Categorical or dtype == nw.Boolean: # noqa: PLR1714
return "nominal"
elif dtype.is_numeric():
return "quantitative"
elif dtype == nw.Datetime or dtype == nw.Date: # noqa: PLR1714
# We use `== nw.Datetime` to check for any kind of Datetime, regardless of time
# unit and time zone. Prefer this over `dtype in {nw.Datetime, nw.Date}`,
# see https://narwhals-dev.github.io/narwhals/backcompat.
return "temporal"
else:
msg = f"Unexpected DtypeKind: {dtype}"
raise ValueError(msg)
def use_signature(tp: Callable[P, Any], /):
"""
Use the signature and doc of ``tp`` for the decorated callable ``cb``.
- **Overload 1**: Decorating method
- **Overload 2**: Decorating function
Returns
-------
**Adding the annotation breaks typing**:
Overload[Callable[[WrapsMethod[T, R]], WrappedMethod[T, P, R]], Callable[[WrapsFunc[R]], WrappedFunc[P, R]]]
"""
@overload
def decorate(cb: WrapsMethod[T, R], /) -> WrappedMethod[T, P, R]: ... # pyright: ignore[reportOverlappingOverload]
@overload
def decorate(cb: WrapsFunc[R], /) -> WrappedFunc[P, R]: ... # pyright: ignore[reportOverlappingOverload]
def decorate(cb: WrapsFunc[R], /) -> WrappedMethod[T, P, R] | WrappedFunc[P, R]:
"""
Raises when no doc was found.
Notes
-----
- Reference to ``tp`` is stored in ``cb.__wrapped__``.
- The doc for ``cb`` will have a ``.rst`` link added, referring to ``tp``.
"""
cb.__wrapped__ = getattr(tp, "__init__", tp) # type: ignore[attr-defined]
if doc_in := tp.__doc__:
line_1 = f"{cb.__doc__ or f'Refer to :class:`{tp.__name__}`'}\n"
cb.__doc__ = "".join((line_1, *doc_in.splitlines(keepends=True)[1:]))
return cb
else:
msg = f"Found no doc for {tp!r}"
raise AttributeError(msg)
return decorate
@overload
def update_nested(
original: t.MutableMapping[Any, Any],
update: t.Mapping[Any, Any],
copy: Literal[False] = ...,
) -> t.MutableMapping[Any, Any]: ...
@overload
def update_nested(
original: t.Mapping[Any, Any],
update: t.Mapping[Any, Any],
copy: Literal[True],
) -> t.MutableMapping[Any, Any]: ...
def update_nested(
original: Any,
update: t.Mapping[Any, Any],
copy: bool = False,
) -> t.MutableMapping[Any, Any]:
"""
Update nested dictionaries.
Parameters
----------
original : MutableMapping
the original (nested) dictionary, which will be updated in-place
update : Mapping
the nested dictionary of updates
copy : bool, default False
if True, then copy the original dictionary rather than modifying it
Returns
-------
original : MutableMapping
a reference to the (modified) original dict
Examples
--------
>>> original = {"x": {"b": 2, "c": 4}}
>>> update = {"x": {"b": 5, "d": 6}, "y": 40}
>>> update_nested(original, update) # doctest: +SKIP
{'x': {'b': 5, 'c': 4, 'd': 6}, 'y': 40}
>>> original # doctest: +SKIP
{'x': {'b': 5, 'c': 4, 'd': 6}, 'y': 40}
"""
if copy:
original = deepcopy(original)
for key, val in update.items():
if isinstance(val, Mapping):
orig_val = original.get(key, {})
if isinstance(orig_val, MutableMapping):
original[key] = update_nested(orig_val, val)
else:
original[key] = val
else:
original[key] = val
return original
def display_traceback(in_ipython: bool = True):
exc_info = sys.exc_info()
if in_ipython:
from IPython.core.getipython import get_ipython
ip = get_ipython()
else:
ip = None
if ip is not None:
ip.showtraceback(exc_info)
else:
traceback.print_exception(*exc_info)
_ChannelType = Literal["field", "datum", "value"]
_CHANNEL_CACHE: _ChannelCache
"""Singleton `_ChannelCache` instance.
Initialized on first use.
"""
class _ChannelCache:
channel_to_name: dict[type[SchemaBase], str]
name_to_channel: dict[str, dict[_ChannelType, type[SchemaBase]]]
@classmethod
def from_cache(cls) -> _ChannelCache:
global _CHANNEL_CACHE
try:
cached = _CHANNEL_CACHE
except NameError:
cached = cls.__new__(cls)
cached.channel_to_name = _init_channel_to_name() # pyright: ignore[reportAttributeAccessIssue]
cached.name_to_channel = _invert_group_channels(cached.channel_to_name)
_CHANNEL_CACHE = cached
return _CHANNEL_CACHE
def get_encoding(self, tp: type[Any], /) -> str:
if encoding := self.channel_to_name.get(tp):
return encoding
msg = f"positional of type {type(tp).__name__!r}"
raise NotImplementedError(msg)
def _wrap_in_channel(self, obj: Any, encoding: str, /):
if isinstance(obj, SchemaBase):
return obj
elif isinstance(obj, str):
obj = {"shorthand": obj}
elif isinstance(obj, (list, tuple)):
return [self._wrap_in_channel(el, encoding) for el in obj]
elif isinstance(obj, SchemaLike):
obj = obj.to_dict()
if channel := self.name_to_channel.get(encoding):
tp = channel["value" if "value" in obj else "field"]
try:
# Don't force validation here; some objects won't be valid until
# they're created in the context of a chart.
return tp.from_dict(obj, validate=False)
except jsonschema.ValidationError:
# our attempts at finding the correct class have failed
return obj
else:
warnings.warn(f"Unrecognized encoding channel {encoding!r}", stacklevel=1)
return obj
def infer_encoding_types(self, kwargs: dict[str, Any], /):
return {
encoding: self._wrap_in_channel(obj, encoding)
for encoding, obj in kwargs.items()
if obj is not Undefined
}
def _init_channel_to_name():
"""
Construct a dictionary of channel type to encoding name.
Note
----
The return type is not expressible using annotations, but is used
internally by `mypy`/`pyright` and avoids the need for type ignores.
Returns
-------
mapping: dict[type[`<subclass of FieldChannelMixin and SchemaBase>`] | type[`<subclass of ValueChannelMixin and SchemaBase>`] | type[`<subclass of DatumChannelMixin and SchemaBase>`], str]
"""
from altair.vegalite.v5.schema import channels as ch
mixins = ch.FieldChannelMixin, ch.ValueChannelMixin, ch.DatumChannelMixin
return {
c: c._encoding_name
for c in ch.__dict__.values()
if isinstance(c, type) and issubclass(c, mixins) and issubclass(c, SchemaBase)
}
def _invert_group_channels(
m: dict[type[SchemaBase], str], /
) -> dict[str, dict[_ChannelType, type[SchemaBase]]]:
"""Grouped inverted index for `_ChannelCache.channel_to_name`."""
def _reduce(it: Iterator[tuple[type[Any], str]]) -> Any:
"""
Returns a 1-2 item dict, per channel.
Never includes `datum`, as it is never utilized in `wrap_in_channel`.
"""
item: dict[Any, type[SchemaBase]] = {}
for tp, _ in it:
name = tp.__name__
if name.endswith("Datum"):
continue
elif name.endswith("Value"):
sub_key = "value"
else:
sub_key = "field"
item[sub_key] = tp
return item
grouper = groupby(m.items(), itemgetter(1))
return {k: _reduce(chans) for k, chans in grouper}
def infer_encoding_types(args: tuple[Any, ...], kwargs: dict[str, Any]):
"""
Infer typed keyword arguments for args and kwargs.
Parameters
----------
args : Sequence
Sequence of function args
kwargs : MutableMapping
Dict of function kwargs
Returns
-------
kwargs : dict
All args and kwargs in a single dict, with keys and types
based on the channels mapping.
"""
cache = _ChannelCache.from_cache()
# First use the mapping to convert args to kwargs based on their types.
for arg in args:
el = next(iter(arg), None) if isinstance(arg, (list, tuple)) else arg
encoding = cache.get_encoding(type(el))
if encoding not in kwargs:
kwargs[encoding] = arg
else:
msg = f"encoding {encoding!r} specified twice."
raise ValueError(msg)
return cache.infer_encoding_types(kwargs)

View File

@@ -0,0 +1,442 @@
from __future__ import annotations
import hashlib
import json
import random
import sys
from collections.abc import MutableMapping, Sequence
from functools import partial
from pathlib import Path
from typing import (
TYPE_CHECKING,
Any,
Callable,
Literal,
TypedDict,
TypeVar,
Union,
overload,
)
import narwhals.stable.v1 as nw
from narwhals.stable.v1.dependencies import is_pandas_dataframe
from narwhals.stable.v1.typing import IntoDataFrame
from ._importers import import_pyarrow_interchange
from .core import (
DataFrameLike,
sanitize_geo_interface,
sanitize_narwhals_dataframe,
sanitize_pandas_dataframe,
to_eager_narwhals_dataframe,
)
from .plugin_registry import PluginRegistry
if sys.version_info >= (3, 13):
from typing import Protocol, runtime_checkable
else:
from typing_extensions import Protocol, runtime_checkable
if sys.version_info >= (3, 10):
from typing import Concatenate, ParamSpec
else:
from typing_extensions import Concatenate, ParamSpec
if TYPE_CHECKING:
if sys.version_info >= (3, 13):
from typing import TypeIs
else:
from typing_extensions import TypeIs
if sys.version_info >= (3, 10):
from typing import TypeAlias
else:
from typing_extensions import TypeAlias
import pandas as pd
import pyarrow as pa
@runtime_checkable
class SupportsGeoInterface(Protocol):
__geo_interface__: MutableMapping
DataType: TypeAlias = Union[
dict[Any, Any], IntoDataFrame, SupportsGeoInterface, DataFrameLike
]
TDataType = TypeVar("TDataType", bound=DataType)
TIntoDataFrame = TypeVar("TIntoDataFrame", bound=IntoDataFrame)
VegaLiteDataDict: TypeAlias = dict[
str, Union[str, dict[Any, Any], list[dict[Any, Any]]]
]
ToValuesReturnType: TypeAlias = dict[str, Union[dict[Any, Any], list[dict[Any, Any]]]]
SampleReturnType = Union[IntoDataFrame, dict[str, Sequence], None]
def is_data_type(obj: Any) -> TypeIs[DataType]:
return isinstance(obj, (dict, SupportsGeoInterface)) or isinstance(
nw.from_native(obj, eager_or_interchange_only=True, pass_through=True),
nw.DataFrame,
)
# ==============================================================================
# Data transformer registry
#
# A data transformer is a callable that takes a supported data type and returns
# a transformed dictionary version of it which is compatible with the VegaLite schema.
# The dict objects will be the Data portion of the VegaLite schema.
#
# Renderers only deal with the dict form of a
# VegaLite spec, after the Data model has been put into a schema compliant
# form.
# ==============================================================================
P = ParamSpec("P")
# NOTE: `Any` required due to the complexity of existing signatures imported in `altair.vegalite.v5.data.py`
R = TypeVar("R", VegaLiteDataDict, Any)
DataTransformerType = Callable[Concatenate[DataType, P], R]
class DataTransformerRegistry(PluginRegistry[DataTransformerType, R]):
_global_settings = {"consolidate_datasets": True}
@property
def consolidate_datasets(self) -> bool:
return self._global_settings["consolidate_datasets"]
@consolidate_datasets.setter
def consolidate_datasets(self, value: bool) -> None:
self._global_settings["consolidate_datasets"] = value
# ==============================================================================
class MaxRowsError(Exception):
"""Raised when a data model has too many rows."""
@overload
def limit_rows(data: None = ..., max_rows: int | None = ...) -> partial: ...
@overload
def limit_rows(data: DataType, max_rows: int | None = ...) -> DataType: ...
def limit_rows(
data: DataType | None = None, max_rows: int | None = 5000
) -> partial | DataType:
"""
Raise MaxRowsError if the data model has more than max_rows.
If max_rows is None, then do not perform any check.
"""
if data is None:
return partial(limit_rows, max_rows=max_rows)
check_data_type(data)
def raise_max_rows_error():
msg = (
"The number of rows in your dataset is greater "
f"than the maximum allowed ({max_rows}).\n\n"
"Try enabling the VegaFusion data transformer which "
"raises this limit by pre-evaluating data\n"
"transformations in Python.\n"
" >> import altair as alt\n"
' >> alt.data_transformers.enable("vegafusion")\n\n'
"Or, see https://altair-viz.github.io/user_guide/large_datasets.html "
"for additional information\n"
"on how to plot large datasets."
)
raise MaxRowsError(msg)
if isinstance(data, SupportsGeoInterface):
if data.__geo_interface__["type"] == "FeatureCollection":
values = data.__geo_interface__["features"]
else:
values = data.__geo_interface__
elif isinstance(data, dict):
if "values" in data:
values = data["values"]
else:
return data
else:
data = to_eager_narwhals_dataframe(data)
values = data
if max_rows is not None and len(values) > max_rows:
raise_max_rows_error()
return data
@overload
def sample(
data: None = ..., n: int | None = ..., frac: float | None = ...
) -> partial: ...
@overload
def sample(
data: TIntoDataFrame, n: int | None = ..., frac: float | None = ...
) -> TIntoDataFrame: ...
@overload
def sample(
data: DataType, n: int | None = ..., frac: float | None = ...
) -> SampleReturnType: ...
def sample(
data: DataType | None = None,
n: int | None = None,
frac: float | None = None,
) -> partial | SampleReturnType:
"""Reduce the size of the data model by sampling without replacement."""
if data is None:
return partial(sample, n=n, frac=frac)
check_data_type(data)
if is_pandas_dataframe(data):
return data.sample(n=n, frac=frac)
elif isinstance(data, dict):
if "values" in data:
values = data["values"]
if not n:
if frac is None:
msg = "frac cannot be None if n is None and data is a dictionary"
raise ValueError(msg)
n = int(frac * len(values))
values = random.sample(values, n)
return {"values": values}
else:
# Maybe this should raise an error or return something useful?
return None
data = nw.from_native(data, eager_only=True)
if not n:
if frac is None:
msg = "frac cannot be None if n is None with this data input type"
raise ValueError(msg)
n = int(frac * len(data))
indices = random.sample(range(len(data)), n)
return data[indices].to_native()
_FormatType = Literal["csv", "json"]
class _FormatDict(TypedDict):
type: _FormatType
class _ToFormatReturnUrlDict(TypedDict):
url: str
format: _FormatDict
@overload
def to_json(
data: None = ...,
prefix: str = ...,
extension: str = ...,
filename: str = ...,
urlpath: str = ...,
) -> partial: ...
@overload
def to_json(
data: DataType,
prefix: str = ...,
extension: str = ...,
filename: str = ...,
urlpath: str = ...,
) -> _ToFormatReturnUrlDict: ...
def to_json(
data: DataType | None = None,
prefix: str = "altair-data",
extension: str = "json",
filename: str = "{prefix}-{hash}.{extension}",
urlpath: str = "",
) -> partial | _ToFormatReturnUrlDict:
"""Write the data model to a .json file and return a url based data model."""
kwds = _to_text_kwds(prefix, extension, filename, urlpath)
if data is None:
return partial(to_json, **kwds)
else:
data_str = _data_to_json_string(data)
return _to_text(data_str, **kwds, format=_FormatDict(type="json"))
@overload
def to_csv(
data: None = ...,
prefix: str = ...,
extension: str = ...,
filename: str = ...,
urlpath: str = ...,
) -> partial: ...
@overload
def to_csv(
data: dict | pd.DataFrame | DataFrameLike,
prefix: str = ...,
extension: str = ...,
filename: str = ...,
urlpath: str = ...,
) -> _ToFormatReturnUrlDict: ...
def to_csv(
data: dict | pd.DataFrame | DataFrameLike | None = None,
prefix: str = "altair-data",
extension: str = "csv",
filename: str = "{prefix}-{hash}.{extension}",
urlpath: str = "",
) -> partial | _ToFormatReturnUrlDict:
"""Write the data model to a .csv file and return a url based data model."""
kwds = _to_text_kwds(prefix, extension, filename, urlpath)
if data is None:
return partial(to_csv, **kwds)
else:
data_str = _data_to_csv_string(data)
return _to_text(data_str, **kwds, format=_FormatDict(type="csv"))
def _to_text(
data: str,
prefix: str,
extension: str,
filename: str,
urlpath: str,
format: _FormatDict,
) -> _ToFormatReturnUrlDict:
data_hash = _compute_data_hash(data)
filename = filename.format(prefix=prefix, hash=data_hash, extension=extension)
Path(filename).write_text(data, encoding="utf-8")
url = str(Path(urlpath, filename))
return _ToFormatReturnUrlDict({"url": url, "format": format})
def _to_text_kwds(prefix: str, extension: str, filename: str, urlpath: str, /) -> dict[str, str]: # fmt: skip
return {"prefix": prefix, "extension": extension, "filename": filename, "urlpath": urlpath} # fmt: skip
def to_values(data: DataType) -> ToValuesReturnType:
"""Replace a DataFrame by a data model with values."""
check_data_type(data)
# `pass_through=True` passes `data` through as-is if it is not a Narwhals object.
data_native = nw.to_native(data, pass_through=True)
if isinstance(data_native, SupportsGeoInterface):
return {"values": _from_geo_interface(data_native)}
elif is_pandas_dataframe(data_native):
data_native = sanitize_pandas_dataframe(data_native)
return {"values": data_native.to_dict(orient="records")}
elif isinstance(data_native, dict):
if "values" not in data_native:
msg = "values expected in data dict, but not present."
raise KeyError(msg)
return data_native
elif isinstance(data, nw.DataFrame):
data = sanitize_narwhals_dataframe(data)
return {"values": data.rows(named=True)}
else:
# Should never reach this state as tested by check_data_type
msg = f"Unrecognized data type: {type(data)}"
raise ValueError(msg)
def check_data_type(data: DataType) -> None:
if not is_data_type(data):
msg = f"Expected dict, DataFrame or a __geo_interface__ attribute, got: {type(data)}"
raise TypeError(msg)
# ==============================================================================
# Private utilities
# ==============================================================================
def _compute_data_hash(data_str: str) -> str:
return hashlib.sha256(data_str.encode()).hexdigest()[:32]
def _from_geo_interface(data: SupportsGeoInterface | Any) -> dict[str, Any]:
"""
Santize a ``__geo_interface__`` w/ pre-santize step for ``pandas`` if needed.
Notes
-----
Split out to resolve typing issues related to:
- Intersection types
- ``typing.TypeGuard``
- ``pd.DataFrame.__getattr__``
"""
if is_pandas_dataframe(data):
data = sanitize_pandas_dataframe(data)
return sanitize_geo_interface(data.__geo_interface__)
def _data_to_json_string(data: DataType) -> str:
"""Return a JSON string representation of the input data."""
check_data_type(data)
if isinstance(data, SupportsGeoInterface):
return json.dumps(_from_geo_interface(data))
elif is_pandas_dataframe(data):
data = sanitize_pandas_dataframe(data)
return data.to_json(orient="records", double_precision=15)
elif isinstance(data, dict):
if "values" not in data:
msg = "values expected in data dict, but not present."
raise KeyError(msg)
return json.dumps(data["values"], sort_keys=True)
try:
data_nw = nw.from_native(data, eager_only=True)
except TypeError as exc:
msg = "to_json only works with data expressed as a DataFrame or as a dict"
raise NotImplementedError(msg) from exc
data_nw = sanitize_narwhals_dataframe(data_nw)
return json.dumps(data_nw.rows(named=True))
def _data_to_csv_string(data: DataType) -> str:
"""Return a CSV string representation of the input data."""
check_data_type(data)
if isinstance(data, SupportsGeoInterface):
msg = (
f"to_csv does not yet work with data that "
f"is of type {type(SupportsGeoInterface).__name__!r}.\n"
f"See https://github.com/vega/altair/issues/3441"
)
raise NotImplementedError(msg)
elif is_pandas_dataframe(data):
data = sanitize_pandas_dataframe(data)
return data.to_csv(index=False)
elif isinstance(data, dict):
if "values" not in data:
msg = "values expected in data dict, but not present"
raise KeyError(msg)
try:
import pandas as pd
except ImportError as exc:
msg = "pandas is required to convert a dict to a CSV string"
raise ImportError(msg) from exc
return pd.DataFrame.from_dict(data["values"]).to_csv(index=False)
try:
data_nw = nw.from_native(data, eager_only=True)
except TypeError as exc:
msg = "to_csv only works with data expressed as a DataFrame or as a dict"
raise NotImplementedError(msg) from exc
return data_nw.write_csv()
def arrow_table_from_dfi_dataframe(dfi_df: DataFrameLike) -> pa.Table:
"""Convert a DataFrame Interchange Protocol compatible object to an Arrow Table."""
import pyarrow as pa
# First check if the dataframe object has a method to convert to arrow.
# Give this preference over the pyarrow from_dataframe function since the object
# has more control over the conversion, and may have broader compatibility.
# This is the case for Polars, which supports Date32 columns in direct conversion
# while pyarrow does not yet support this type in from_dataframe
for convert_method_name in ("arrow", "to_arrow", "to_arrow_table", "to_pyarrow"):
convert_method = getattr(dfi_df, convert_method_name, None)
if callable(convert_method):
result = convert_method()
if isinstance(result, pa.Table):
return result
pi = import_pyarrow_interchange()
return pi.from_dataframe(dfi_df)

View File

@@ -0,0 +1,196 @@
from __future__ import annotations
import sys
import threading
import warnings
from typing import TYPE_CHECKING, Literal
if sys.version_info >= (3, 13):
from warnings import deprecated as _deprecated
else:
from typing_extensions import deprecated as _deprecated
if TYPE_CHECKING:
if sys.version_info >= (3, 11):
from typing import LiteralString
else:
from typing_extensions import LiteralString
__all__ = [
"AltairDeprecationWarning",
"deprecated",
"deprecated_static_only",
"deprecated_warn",
]
class AltairDeprecationWarning(DeprecationWarning): ...
def _format_message(
version: LiteralString,
alternative: LiteralString | None,
message: LiteralString | None,
/,
) -> LiteralString:
output = f"\nDeprecated since `altair={version}`."
if alternative:
output = f"{output} Use {alternative} instead."
return f"{output}\n{message}" if message else output
# NOTE: Annotating the return type breaks `pyright` detecting [reportDeprecated]
# NOTE: `LiteralString` requirement is introduced by stubs
def deprecated(
*,
version: LiteralString,
alternative: LiteralString | None = None,
message: LiteralString | None = None,
category: type[AltairDeprecationWarning] | None = AltairDeprecationWarning,
stacklevel: int = 1,
): # te.deprecated
"""
Indicate that a class, function or overload is deprecated.
When this decorator is applied to an object, the type checker
will generate a diagnostic on usage of the deprecated object.
Parameters
----------
version
``altair`` version the deprecation first appeared.
alternative
Suggested replacement class/method/function.
message
Additional message appended to ``version``, ``alternative``.
category
If the *category* is ``None``, no warning is emitted at runtime.
stacklevel
The *stacklevel* determines where the
warning is emitted. If it is ``1`` (the default), the warning
is emitted at the direct caller of the deprecated object; if it
is higher, it is emitted further up the stack.
Static type checker behavior is not affected by the *category*
and *stacklevel* arguments.
References
----------
[PEP 702](https://peps.python.org/pep-0702/)
"""
msg = _format_message(version, alternative, message)
return _deprecated(msg, category=category, stacklevel=stacklevel)
def deprecated_warn(
message: LiteralString,
*,
version: LiteralString,
alternative: LiteralString | None = None,
category: type[AltairDeprecationWarning] = AltairDeprecationWarning,
stacklevel: int = 2,
action: Literal["once"] | None = None,
) -> None:
"""
Indicate that the current code path is deprecated.
This should be used for non-trivial cases *only*. ``@deprecated`` should
always be preferred as it is recognized by static type checkers.
Parameters
----------
message
Explanation of the deprecated behaviour.
.. note::
Unlike ``@deprecated``, this is *not* optional.
version
``altair`` version the deprecation first appeared.
alternative
Suggested replacement argument/method/function.
category
The runtime warning type emitted.
stacklevel
How far up the call stack to make this warning appear.
A value of ``2`` attributes the warning to the caller
of the code calling ``deprecated_warn()``.
References
----------
[warnings.warn](https://docs.python.org/3/library/warnings.html#warnings.warn)
"""
msg = _format_message(version, alternative, message)
if action is None:
warnings.warn(msg, category=category, stacklevel=stacklevel)
elif action == "once":
_warn_once(msg, category=category, stacklevel=stacklevel)
else:
raise NotImplementedError(action)
deprecated_static_only = _deprecated
"""
Using this decorator **exactly as described**, ensures ``message`` is displayed to a static type checker.
**BE CAREFUL USING THIS**.
See screenshots in `comment`_ for motivation.
Every use should look like::
@deprecated_static_only(
"Deprecated since `altair=5.5.0`. Use altair.other instead.",
category=None,
)
def old_function(*args): ...
If a runtime warning is desired, use `@alt.utils.deprecated` instead.
Parameters
----------
message : LiteralString
- **Not** a variable
- **Not** use placeholders
- **Not** use concatenation
- **Do not use anything that could be considered dynamic**
category : None
You **need** to explicitly pass ``None``
.. _comment:
https://github.com/vega/altair/pull/3618#issuecomment-2423991968
---
"""
class _WarningsMonitor:
def __init__(self) -> None:
self._warned: dict[LiteralString, Literal[True]] = {}
self._lock = threading.Lock()
def __contains__(self, key: LiteralString, /) -> bool:
with self._lock:
return key in self._warned
def hit(self, key: LiteralString, /) -> None:
with self._lock:
self._warned[key] = True
def clear(self) -> None:
with self._lock:
self._warned.clear()
_warnings_monitor = _WarningsMonitor()
def _warn_once(
msg: LiteralString, /, *, category: type[AltairDeprecationWarning], stacklevel: int
) -> None:
global _warnings_monitor
if msg in _warnings_monitor:
return
else:
_warnings_monitor.hit(msg)
warnings.warn(msg, category=category, stacklevel=stacklevel + 1)

View File

@@ -0,0 +1,232 @@
from __future__ import annotations
import json
import pkgutil
import textwrap
import uuid
from typing import TYPE_CHECKING, Any, Callable, Union
from ._vegafusion_data import compile_with_vegafusion, using_vegafusion
from .mimebundle import spec_to_mimebundle
from .plugin_registry import PluginEnabler, PluginRegistry
from .schemapi import validate_jsonschema
if TYPE_CHECKING:
import sys
if sys.version_info >= (3, 10):
from typing import TypeAlias
else:
from typing_extensions import TypeAlias
# ==============================================================================
# Renderer registry
# ==============================================================================
# MimeBundleType needs to be the same as what are acceptable return values
# for _repr_mimebundle_,
# see https://ipython.readthedocs.io/en/stable/config/integrating.html#MyObject._repr_mimebundle_
MimeBundleDataType: TypeAlias = dict[str, Any]
MimeBundleMetaDataType: TypeAlias = dict[str, Any]
MimeBundleType: TypeAlias = Union[
MimeBundleDataType, tuple[MimeBundleDataType, MimeBundleMetaDataType]
]
RendererType: TypeAlias = Callable[..., MimeBundleType]
# Subtype of MimeBundleType as more specific in the values of the dictionaries
DefaultRendererReturnType: TypeAlias = tuple[
dict[str, Union[str, dict[str, Any]]], dict[str, dict[str, Any]]
]
class RendererRegistry(PluginRegistry[RendererType, MimeBundleType]):
entrypoint_err_messages = {
"notebook": textwrap.dedent(
"""
To use the 'notebook' renderer, you must install the vega package
and the associated Jupyter extension.
See https://altair-viz.github.io/getting_started/installation.html
for more information.
"""
),
}
def set_embed_options(
self,
defaultStyle: bool | str | None = None,
renderer: str | None = None,
width: int | None = None,
height: int | None = None,
padding: int | None = None,
scaleFactor: float | None = None,
actions: bool | dict[str, bool] | None = None,
format_locale: str | dict | None = None,
time_format_locale: str | dict | None = None,
**kwargs,
) -> PluginEnabler:
"""
Set options for embeddings of Vega & Vega-Lite charts.
Options are fully documented at https://github.com/vega/vega-embed.
Similar to the `enable()` method, this can be used as either
a persistent global switch, or as a temporary local setting using
a context manager (i.e. a `with` statement).
Parameters
----------
defaultStyle : bool or string
Specify a default stylesheet for embed actions.
renderer : string
The renderer to use for the view. One of "canvas" (default) or "svg"
width : integer
The view width in pixels
height : integer
The view height in pixels
padding : integer
The view padding in pixels
scaleFactor : number
The number by which to multiply the width and height (default 1)
of an exported PNG or SVG image.
actions : bool or dict
Determines if action links ("Export as PNG/SVG", "View Source",
"View Vega" (only for Vega-Lite), "Open in Vega Editor") are
included with the embedded view. If the value is true, all action
links will be shown and none if the value is false. This property
can take a key-value mapping object that maps keys (export, source,
compiled, editor) to boolean values for determining if
each action link should be shown.
format_locale : str or dict
d3-format locale name or dictionary. Defaults to "en-US" for United States English.
See https://github.com/d3/d3-format/tree/main/locale for available names and example
definitions.
time_format_locale : str or dict
d3-time-format locale name or dictionary. Defaults to "en-US" for United States English.
See https://github.com/d3/d3-time-format/tree/main/locale for available names and example
definitions.
**kwargs :
Additional options are passed directly to embed options.
"""
options: dict[str, bool | str | float | dict[str, bool] | None] = {
"defaultStyle": defaultStyle,
"renderer": renderer,
"width": width,
"height": height,
"padding": padding,
"scaleFactor": scaleFactor,
"actions": actions,
"formatLocale": format_locale,
"timeFormatLocale": time_format_locale,
}
kwargs.update({key: val for key, val in options.items() if val is not None})
return self.enable(None, embed_options=kwargs)
# ==============================================================================
# VegaLite v1/v2 renderer logic
# ==============================================================================
class Displayable:
"""
A base display class for VegaLite v1/v2.
This class takes a VegaLite v1/v2 spec and does the following:
1. Optionally validates the spec against a schema.
2. Uses the RendererPlugin to grab a renderer and call it when the
IPython/Jupyter display method (_repr_mimebundle_) is called.
The spec passed to this class must be fully schema compliant and already
have the data portion of the spec fully processed and ready to serialize.
In practice, this means, the data portion of the spec should have been passed
through appropriate data model transformers.
"""
renderers: RendererRegistry | None = None
schema_path = ("altair", "")
def __init__(self, spec: dict[str, Any], validate: bool = False) -> None:
self.spec = spec
self.validate = validate
self._validate()
def _validate(self) -> None:
"""Validate the spec against the schema."""
data = pkgutil.get_data(*self.schema_path)
assert data is not None
schema_dict: dict[str, Any] = json.loads(data.decode("utf-8"))
validate_jsonschema(
self.spec,
schema_dict,
)
def _repr_mimebundle_(
self, include: Any = None, exclude: Any = None
) -> MimeBundleType:
"""Return a MIME bundle for display in Jupyter frontends."""
if self.renderers is not None:
renderer_func = self.renderers.get()
assert renderer_func is not None
return renderer_func(self.spec)
else:
return {}
def default_renderer_base(
spec: dict[str, Any], mime_type: str, str_repr: str, **options
) -> DefaultRendererReturnType:
"""
A default renderer for Vega or VegaLite that works for modern frontends.
This renderer works with modern frontends (JupyterLab, nteract) that know
how to render the custom VegaLite MIME type listed above.
"""
# Local import to avoid circular ImportError
from altair.vegalite.v5.display import VEGA_MIME_TYPE, VEGALITE_MIME_TYPE
assert isinstance(spec, dict)
bundle: dict[str, str | dict] = {}
metadata: dict[str, dict[str, Any]] = {}
if using_vegafusion():
spec = compile_with_vegafusion(spec)
# Swap mimetype from Vega-Lite to Vega.
# If mimetype was JSON, leave it alone
if mime_type == VEGALITE_MIME_TYPE:
mime_type = VEGA_MIME_TYPE
bundle[mime_type] = spec
bundle["text/plain"] = str_repr
if options:
metadata[mime_type] = options
return bundle, metadata
def json_renderer_base(
spec: dict[str, Any], str_repr: str, **options
) -> DefaultRendererReturnType:
"""
A renderer that returns a MIME type of application/json.
In JupyterLab/nteract this is rendered as a nice JSON tree.
"""
return default_renderer_base(
spec, mime_type="application/json", str_repr=str_repr, **options
)
class HTMLRenderer:
"""Object to render charts as HTML, with a unique output div each time."""
def __init__(self, output_div: str = "altair-viz-{}", **kwargs) -> None:
self._output_div = output_div
self.kwargs = kwargs
@property
def output_div(self) -> str:
return self._output_div.format(uuid.uuid4().hex)
def __call__(self, spec: dict[str, Any], **metadata) -> dict[str, str]:
kwargs = self.kwargs.copy()
kwargs.update(**metadata, output_div=self.output_div)
return spec_to_mimebundle(spec, format="html", **kwargs)

View File

@@ -0,0 +1,98 @@
from __future__ import annotations
import ast
import sys
from typing import TYPE_CHECKING, Any, Callable, Literal, overload
if TYPE_CHECKING:
from os import PathLike
from _typeshed import ReadableBuffer
if sys.version_info >= (3, 11):
from typing import Self
else:
from typing_extensions import Self
class _CatchDisplay:
"""Class to temporarily catch sys.displayhook."""
def __init__(self) -> None:
self.output: Any | None = None
def __enter__(self) -> Self:
self.old_hook: Callable[[object], Any] = sys.displayhook
sys.displayhook = self
return self
def __exit__(self, type, value, traceback) -> Literal[False]:
sys.displayhook = self.old_hook
# Returning False will cause exceptions to propagate
return False
def __call__(self, output: Any) -> None:
self.output = output
@overload
def eval_block(
code: str | Any,
namespace: dict[str, Any] | None = ...,
filename: str | ReadableBuffer | PathLike[Any] = ...,
*,
strict: Literal[False] = ...,
) -> Any | None: ...
@overload
def eval_block(
code: str | Any,
namespace: dict[str, Any] | None = ...,
filename: str | ReadableBuffer | PathLike[Any] = ...,
*,
strict: Literal[True] = ...,
) -> Any: ...
def eval_block(
code: str | Any,
namespace: dict[str, Any] | None = None,
filename: str | ReadableBuffer | PathLike[Any] = "<string>",
*,
strict: bool = False,
) -> Any | None:
"""
Execute a multi-line block of code in the given namespace.
If the final statement in the code is an expression, return
the result of the expression.
If ``strict``, raise a ``TypeError`` when the return value would be ``None``.
"""
tree = ast.parse(code, filename="<ast>", mode="exec")
if namespace is None:
namespace = {}
catch_display = _CatchDisplay()
if isinstance(tree.body[-1], ast.Expr):
to_exec, to_eval = tree.body[:-1], tree.body[-1:]
else:
to_exec, to_eval = tree.body, []
for node in to_exec:
compiled = compile(ast.Module([node], []), filename=filename, mode="exec")
exec(compiled, namespace)
with catch_display:
for node in to_eval:
compiled = compile(
ast.Interactive([node]), filename=filename, mode="single"
)
exec(compiled, namespace)
if strict:
output = catch_display.output
if output is None:
msg = f"Expected a non-None value but got {output!r}"
raise TypeError(msg)
else:
return output
else:
return catch_display.output

View File

@@ -0,0 +1,411 @@
from __future__ import annotations
import json
from typing import Any, Literal
import jinja2
from altair.utils._importers import import_vl_convert, vl_version_for_vl_convert
TemplateName = Literal["standard", "universal", "inline", "olli"]
RenderMode = Literal["vega", "vega-lite"]
HTML_TEMPLATE = jinja2.Template(
"""
{%- if fullhtml -%}
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
{%- endif %}
<style>
#{{ output_div }}.vega-embed {
width: 100%;
display: flex;
}
#{{ output_div }}.vega-embed details,
#{{ output_div }}.vega-embed details summary {
position: relative;
}
</style>
{%- if not requirejs %}
<script type="text/javascript" src="{{ base_url }}/vega@{{ vega_version }}"></script>
{%- if mode == 'vega-lite' %}
<script type="text/javascript" src="{{ base_url }}/vega-lite@{{ vegalite_version }}"></script>
{%- endif %}
<script type="text/javascript" src="{{ base_url }}/vega-embed@{{ vegaembed_version }}"></script>
{%- endif %}
{%- if fullhtml %}
{%- if requirejs %}
<script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js"></script>
<script>
requirejs.config({
"paths": {
"vega": "{{ base_url }}/vega@{{ vega_version }}?noext",
"vega-lib": "{{ base_url }}/vega-lib?noext",
"vega-lite": "{{ base_url }}/vega-lite@{{ vegalite_version }}?noext",
"vega-embed": "{{ base_url }}/vega-embed@{{ vegaembed_version }}?noext",
}
});
</script>
{%- endif %}
</head>
<body>
{%- endif %}
<div id="{{ output_div }}"></div>
<script>
{%- if requirejs and not fullhtml %}
requirejs.config({
"paths": {
"vega": "{{ base_url }}/vega@{{ vega_version }}?noext",
"vega-lib": "{{ base_url }}/vega-lib?noext",
"vega-lite": "{{ base_url }}/vega-lite@{{ vegalite_version }}?noext",
"vega-embed": "{{ base_url }}/vega-embed@{{ vegaembed_version }}?noext",
}
});
{% endif %}
{% if requirejs -%}
require(['vega-embed'],
{%- else -%}
(
{%- endif -%}
function(vegaEmbed) {
var spec = {{ spec }};
var embedOpt = {{ embed_options }};
function showError(el, error){
el.innerHTML = ('<div style="color:red;">'
+ '<p>JavaScript Error: ' + error.message + '</p>'
+ "<p>This usually means there's a typo in your chart specification. "
+ "See the javascript console for the full traceback.</p>"
+ '</div>');
throw error;
}
const el = document.getElementById('{{ output_div }}');
vegaEmbed("#{{ output_div }}", spec, embedOpt)
.catch(error => showError(el, error));
}){% if not requirejs %}(vegaEmbed){% endif %};
</script>
{%- if fullhtml %}
</body>
</html>
{%- endif %}
"""
)
HTML_TEMPLATE_UNIVERSAL = jinja2.Template(
"""
<style>
#{{ output_div }}.vega-embed {
width: 100%;
display: flex;
}
#{{ output_div }}.vega-embed details,
#{{ output_div }}.vega-embed details summary {
position: relative;
}
</style>
<div id="{{ output_div }}"></div>
<script type="text/javascript">
var VEGA_DEBUG = (typeof VEGA_DEBUG == "undefined") ? {} : VEGA_DEBUG;
(function(spec, embedOpt){
let outputDiv = document.currentScript.previousElementSibling;
if (outputDiv.id !== "{{ output_div }}") {
outputDiv = document.getElementById("{{ output_div }}");
}
const paths = {
"vega": "{{ base_url }}/vega@{{ vega_version }}?noext",
"vega-lib": "{{ base_url }}/vega-lib?noext",
"vega-lite": "{{ base_url }}/vega-lite@{{ vegalite_version }}?noext",
"vega-embed": "{{ base_url }}/vega-embed@{{ vegaembed_version }}?noext",
};
function maybeLoadScript(lib, version) {
var key = `${lib.replace("-", "")}_version`;
return (VEGA_DEBUG[key] == version) ?
Promise.resolve(paths[lib]) :
new Promise(function(resolve, reject) {
var s = document.createElement('script');
document.getElementsByTagName("head")[0].appendChild(s);
s.async = true;
s.onload = () => {
VEGA_DEBUG[key] = version;
return resolve(paths[lib]);
};
s.onerror = () => reject(`Error loading script: ${paths[lib]}`);
s.src = paths[lib];
});
}
function showError(err) {
outputDiv.innerHTML = `<div class="error" style="color:red;">${err}</div>`;
throw err;
}
function displayChart(vegaEmbed) {
vegaEmbed(outputDiv, spec, embedOpt)
.catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));
}
if(typeof define === "function" && define.amd) {
requirejs.config({paths});
let deps = ["vega-embed"];
require(deps, displayChart, err => showError(`Error loading script: ${err.message}`));
} else {
maybeLoadScript("vega", "{{vega_version}}")
.then(() => maybeLoadScript("vega-lite", "{{vegalite_version}}"))
.then(() => maybeLoadScript("vega-embed", "{{vegaembed_version}}"))
.catch(showError)
.then(() => displayChart(vegaEmbed));
}
})({{ spec }}, {{ embed_options }});
</script>
"""
)
# This is like the HTML_TEMPLATE template, but includes vega javascript inline
# so that the resulting file is not dependent on external resources. This was
# ported over from altair_saver.
#
# implies requirejs=False and full_html=True
INLINE_HTML_TEMPLATE = jinja2.Template(
"""\
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<style>
#{{ output_div }}.vega-embed {
width: 100%;
display: flex;
}
#{{ output_div }}.vega-embed details,
#{{ output_div }}.vega-embed details summary {
position: relative;
}
</style>
<script type="text/javascript">
// vega-embed.js bundle with Vega-Lite version v{{ vegalite_version }}
{{ vegaembed_script }}
</script>
</head>
<body>
<div class="vega-visualization" id="{{ output_div }}"></div>
<script type="text/javascript">
const spec = {{ spec }};
const embedOpt = {{ embed_options }};
vegaEmbed('#{{ output_div }}', spec, embedOpt).catch(console.error);
</script>
</body>
</html>
"""
)
HTML_TEMPLATE_OLLI = jinja2.Template(
"""
<style>
#{{ output_div }}.vega-embed {
width: 100%;
display: flex;
}
#{{ output_div }}.vega-embed details,
#{{ output_div }}.vega-embed details summary {
position: relative;
}
</style>
<div id="{{ output_div }}"></div>
<script type="text/javascript">
var VEGA_DEBUG = (typeof VEGA_DEBUG == "undefined") ? {} : VEGA_DEBUG;
(function(spec, embedOpt){
let outputDiv = document.currentScript.previousElementSibling;
if (outputDiv.id !== "{{ output_div }}") {
outputDiv = document.getElementById("{{ output_div }}");
}
const olliDiv = document.createElement("div");
const vegaDiv = document.createElement("div");
outputDiv.appendChild(vegaDiv);
outputDiv.appendChild(olliDiv);
outputDiv = vegaDiv;
const paths = {
"vega": "{{ base_url }}/vega@{{ vega_version }}?noext",
"vega-lib": "{{ base_url }}/vega-lib?noext",
"vega-lite": "{{ base_url }}/vega-lite@{{ vegalite_version }}?noext",
"vega-embed": "{{ base_url }}/vega-embed@{{ vegaembed_version }}?noext",
"olli": "{{ base_url }}/olli@{{ olli_version }}?noext",
"olli-adapters": "{{ base_url }}/olli-adapters@{{ olli_adapters_version }}?noext",
};
function maybeLoadScript(lib, version) {
var key = `${lib.replace("-", "")}_version`;
return (VEGA_DEBUG[key] == version) ?
Promise.resolve(paths[lib]) :
new Promise(function(resolve, reject) {
var s = document.createElement('script');
document.getElementsByTagName("head")[0].appendChild(s);
s.async = true;
s.onload = () => {
VEGA_DEBUG[key] = version;
return resolve(paths[lib]);
};
s.onerror = () => reject(`Error loading script: ${paths[lib]}`);
s.src = paths[lib];
});
}
function showError(err) {
outputDiv.innerHTML = `<div class="error" style="color:red;">${err}</div>`;
throw err;
}
function displayChart(vegaEmbed, olli, olliAdapters) {
vegaEmbed(outputDiv, spec, embedOpt)
.catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));
olliAdapters.VegaLiteAdapter(spec).then(olliVisSpec => {
const olliFunc = typeof olli === 'function' ? olli : olli.olli;
const olliRender = olliFunc(olliVisSpec);
olliDiv.append(olliRender);
});
}
if(typeof define === "function" && define.amd) {
requirejs.config({paths});
let deps = ["vega-embed", "olli", "olli-adapters"];
require(deps, displayChart, err => showError(`Error loading script: ${err.message}`));
} else {
maybeLoadScript("vega", "{{vega_version}}")
.then(() => maybeLoadScript("vega-lite", "{{vegalite_version}}"))
.then(() => maybeLoadScript("vega-embed", "{{vegaembed_version}}"))
.then(() => maybeLoadScript("olli", "{{olli_version}}"))
.then(() => maybeLoadScript("olli-adapters", "{{olli_adapters_version}}"))
.catch(showError)
.then(() => displayChart(vegaEmbed, olli, OlliAdapters));
}
})({{ spec }}, {{ embed_options }});
</script>
"""
)
TEMPLATES: dict[TemplateName, jinja2.Template] = {
"standard": HTML_TEMPLATE,
"universal": HTML_TEMPLATE_UNIVERSAL,
"inline": INLINE_HTML_TEMPLATE,
"olli": HTML_TEMPLATE_OLLI,
}
def spec_to_html(
spec: dict[str, Any],
mode: RenderMode,
vega_version: str | None,
vegaembed_version: str | None,
vegalite_version: str | None = None,
base_url: str = "https://cdn.jsdelivr.net/npm",
output_div: str = "vis",
embed_options: dict[str, Any] | None = None,
json_kwds: dict[str, Any] | None = None,
fullhtml: bool = True,
requirejs: bool = False,
template: jinja2.Template | TemplateName = "standard",
) -> str:
"""
Embed a Vega/Vega-Lite spec into an HTML page.
Parameters
----------
spec : dict
a dictionary representing a vega-lite plot spec.
mode : string {'vega' | 'vega-lite'}
The rendering mode. This value is overridden by embed_options['mode'],
if it is present.
vega_version : string
For html output, the version of vega.js to use.
vegalite_version : string
For html output, the version of vegalite.js to use.
vegaembed_version : string
For html output, the version of vegaembed.js to use.
base_url : string (optional)
The base url from which to load the javascript libraries.
output_div : string (optional)
The id of the div element where the plot will be shown.
embed_options : dict (optional)
Dictionary of options to pass to the vega-embed script. Default
entry is {'mode': mode}.
json_kwds : dict (optional)
Dictionary of keywords to pass to json.dumps().
fullhtml : boolean (optional)
If True (default) then return a full html page. If False, then return
an HTML snippet that can be embedded into an HTML page.
requirejs : boolean (optional)
If False (default) then load libraries from base_url using <script>
tags. If True, then load libraries using requirejs
template : jinja2.Template or string (optional)
Specify the template to use (default = 'standard'). If template is a
string, it must be one of {'universal', 'standard', 'inline'}. Otherwise, it
can be a jinja2.Template object containing a custom template.
Returns
-------
output : string
an HTML string for rendering the chart.
"""
embed_options = embed_options or {}
json_kwds = json_kwds or {}
mode = embed_options.setdefault("mode", mode)
if mode not in {"vega", "vega-lite"}:
msg = "mode must be either 'vega' or 'vega-lite'"
raise ValueError(msg)
if vega_version is None:
msg = "must specify vega_version"
raise ValueError(msg)
if vegaembed_version is None:
msg = "must specify vegaembed_version"
raise ValueError(msg)
if mode == "vega-lite" and vegalite_version is None:
msg = "must specify vega-lite version for mode='vega-lite'"
raise ValueError(msg)
render_kwargs = {}
if template == "inline":
vlc = import_vl_convert()
vl_version = vl_version_for_vl_convert()
render_kwargs["vegaembed_script"] = vlc.javascript_bundle(vl_version=vl_version)
elif template == "olli":
OLLI_VERSION = "2"
OLLI_ADAPTERS_VERSION = "2"
render_kwargs["olli_version"] = OLLI_VERSION
render_kwargs["olli_adapters_version"] = OLLI_ADAPTERS_VERSION
jinja_template = TEMPLATES.get(template, template) # type: ignore[arg-type]
if not hasattr(jinja_template, "render"):
msg = f"Invalid template: {jinja_template}"
raise ValueError(msg)
return jinja_template.render(
spec=json.dumps(spec, **json_kwds),
embed_options=json.dumps(embed_options),
mode=mode,
vega_version=vega_version,
vegalite_version=vegalite_version,
vegaembed_version=vegaembed_version,
base_url=base_url,
output_div=output_div,
fullhtml=fullhtml,
requirejs=requirejs,
**render_kwargs,
)

View File

@@ -0,0 +1,377 @@
from __future__ import annotations
import struct
from typing import TYPE_CHECKING, Any, Literal, cast, overload
from ._importers import import_vl_convert, vl_version_for_vl_convert
from .html import spec_to_html
if TYPE_CHECKING:
import sys
if sys.version_info >= (3, 10):
from typing import TypeAlias
else:
from typing_extensions import TypeAlias
MimeBundleFormat: TypeAlias = Literal[
"html", "json", "png", "svg", "pdf", "vega", "vega-lite"
]
@overload
def spec_to_mimebundle(
spec: dict[str, Any],
format: Literal["json", "vega-lite"],
mode: Literal["vega-lite"] | None = ...,
vega_version: str | None = ...,
vegaembed_version: str | None = ...,
vegalite_version: str | None = ...,
embed_options: dict[str, Any] | None = ...,
engine: Literal["vl-convert"] | None = ...,
**kwargs,
) -> dict[str, dict[str, Any]]: ...
@overload
def spec_to_mimebundle(
spec: dict[str, Any],
format: Literal["html"],
mode: Literal["vega-lite"] | None = ...,
vega_version: str | None = ...,
vegaembed_version: str | None = ...,
vegalite_version: str | None = ...,
embed_options: dict[str, Any] | None = ...,
engine: Literal["vl-convert"] | None = ...,
**kwargs,
) -> dict[str, str]: ...
@overload
def spec_to_mimebundle(
spec: dict[str, Any],
format: Literal["pdf", "svg", "vega"],
mode: Literal["vega-lite"] | None = ...,
vega_version: str | None = ...,
vegaembed_version: str | None = ...,
vegalite_version: str | None = ...,
embed_options: dict[str, Any] | None = ...,
engine: Literal["vl-convert"] | None = ...,
**kwargs,
) -> dict[str, Any]: ...
@overload
def spec_to_mimebundle(
spec: dict[str, Any],
format: Literal["png"],
mode: Literal["vega-lite"] | None = ...,
vega_version: str | None = ...,
vegaembed_version: str | None = ...,
vegalite_version: str | None = ...,
embed_options: dict[str, Any] | None = ...,
engine: Literal["vl-convert"] | None = ...,
**kwargs,
) -> tuple[dict[str, Any], dict[str, Any]]: ...
def spec_to_mimebundle(
spec: dict[str, Any],
format: MimeBundleFormat,
mode: Literal["vega-lite"] | None = None,
vega_version: str | None = None,
vegaembed_version: str | None = None,
vegalite_version: str | None = None,
embed_options: dict[str, Any] | None = None,
engine: Literal["vl-convert"] | None = None,
**kwargs,
) -> dict[str, Any] | tuple[dict[str, Any], dict[str, Any]]:
"""
Convert a vega-lite specification to a mimebundle.
The mimebundle type is controlled by the ``format`` argument, which can be
one of the following ['html', 'json', 'png', 'svg', 'pdf', 'vega', 'vega-lite']
Parameters
----------
spec : dict
a dictionary representing a vega-lite plot spec
format : string {'html', 'json', 'png', 'svg', 'pdf', 'vega', 'vega-lite'}
the file format to be saved.
mode : string {'vega-lite'}
The rendering mode.
vega_version : string
The version of vega.js to use
vegaembed_version : string
The version of vegaembed.js to use
vegalite_version : string
The version of vegalite.js to use. Only required if mode=='vega-lite'
embed_options : dict (optional)
The vegaEmbed options dictionary. Defaults to the embed options set with
alt.renderers.set_embed_options().
(See https://github.com/vega/vega-embed for details)
engine: string {'vl-convert'}
the conversion engine to use for 'png', 'svg', 'pdf', and 'vega' formats
**kwargs :
Additional arguments will be passed to the generating function
Returns
-------
output : dict
a mime-bundle representing the image
Note
----
The png, svg, pdf, and vega outputs require the vl-convert package
"""
# Local import to avoid circular ImportError
from altair import renderers
from altair.utils.display import compile_with_vegafusion, using_vegafusion
if mode != "vega-lite":
msg = "mode must be 'vega-lite'"
raise ValueError(msg)
internal_mode: Literal["vega-lite", "vega"] = mode
if using_vegafusion():
spec = compile_with_vegafusion(spec)
internal_mode = "vega"
# Default to the embed options set by alt.renderers.set_embed_options
if embed_options is None:
final_embed_options = renderers.options.get("embed_options", {})
else:
final_embed_options = embed_options
embed_options = preprocess_embed_options(final_embed_options)
if format in {"png", "svg", "pdf", "vega"}:
return _spec_to_mimebundle_with_engine(
spec,
cast(Literal["png", "svg", "pdf", "vega"], format),
internal_mode,
engine=engine,
format_locale=embed_options.get("formatLocale", None),
time_format_locale=embed_options.get("timeFormatLocale", None),
**kwargs,
)
elif format == "html":
html = spec_to_html(
spec,
mode=internal_mode,
vega_version=vega_version,
vegaembed_version=vegaembed_version,
vegalite_version=vegalite_version,
embed_options=embed_options,
**kwargs,
)
return {"text/html": html}
elif format == "vega-lite":
if vegalite_version is None:
msg = "Must specify vegalite_version"
raise ValueError(msg)
return {f"application/vnd.vegalite.v{vegalite_version[0]}+json": spec}
elif format == "json":
return {"application/json": spec}
else:
msg = (
"format must be one of "
"['html', 'json', 'png', 'svg', 'pdf', 'vega', 'vega-lite']"
)
raise ValueError(msg)
def _spec_to_mimebundle_with_engine(
spec: dict,
format: Literal["png", "svg", "pdf", "vega"],
mode: Literal["vega-lite", "vega"],
format_locale: str | dict | None = None,
time_format_locale: str | dict | None = None,
**kwargs,
) -> Any:
"""
Helper for Vega-Lite to mimebundle conversions that require an engine.
Parameters
----------
spec : dict
a dictionary representing a vega-lite plot spec
format : string {'png', 'svg', 'pdf', 'vega'}
the format of the mimebundle to be returned
mode : string {'vega-lite', 'vega'}
The rendering mode.
engine: string {'vl-convert'}
the conversion engine to use
format_locale : str or dict
d3-format locale name or dictionary. Defaults to "en-US" for United States English.
See https://github.com/d3/d3-format/tree/main/locale for available names and example
definitions.
time_format_locale : str or dict
d3-time-format locale name or dictionary. Defaults to "en-US" for United States English.
See https://github.com/d3/d3-time-format/tree/main/locale for available names and example
definitions.
**kwargs :
Additional arguments will be passed to the conversion function
"""
# Normalize the engine string (if any) by lower casing
# and removing underscores and hyphens
engine = kwargs.pop("engine", None)
normalized_engine = _validate_normalize_engine(engine, format)
if normalized_engine == "vlconvert":
vlc = import_vl_convert()
vl_version = vl_version_for_vl_convert()
if format == "vega":
if mode == "vega":
vg = spec
else:
vg = vlc.vegalite_to_vega(spec, vl_version=vl_version)
return {"application/vnd.vega.v5+json": vg}
elif format == "svg":
if mode == "vega":
svg = vlc.vega_to_svg(
spec,
format_locale=format_locale,
time_format_locale=time_format_locale,
)
else:
svg = vlc.vegalite_to_svg(
spec,
vl_version=vl_version,
format_locale=format_locale,
time_format_locale=time_format_locale,
)
return {"image/svg+xml": svg}
elif format == "png":
scale = kwargs.get("scale_factor", 1)
# The default ppi for a PNG file is 72
default_ppi = 72
ppi = kwargs.get("ppi", default_ppi)
if mode == "vega":
png = vlc.vega_to_png(
spec,
scale=scale,
ppi=ppi,
format_locale=format_locale,
time_format_locale=time_format_locale,
)
else:
png = vlc.vegalite_to_png(
spec,
vl_version=vl_version,
scale=scale,
ppi=ppi,
format_locale=format_locale,
time_format_locale=time_format_locale,
)
factor = ppi / default_ppi
w, h = _pngxy(png)
return {"image/png": png}, {
"image/png": {"width": w / factor, "height": h / factor}
}
elif format == "pdf":
scale = kwargs.get("scale_factor", 1)
if mode == "vega":
pdf = vlc.vega_to_pdf(
spec,
scale=scale,
format_locale=format_locale,
time_format_locale=time_format_locale,
)
else:
pdf = vlc.vegalite_to_pdf(
spec,
vl_version=vl_version,
scale=scale,
format_locale=format_locale,
time_format_locale=time_format_locale,
)
return {"application/pdf": pdf}
else:
# This should be validated above
# but raise exception for the sake of future development
msg = f"Unexpected format {format!r}"
raise ValueError(msg)
else:
# This should be validated above
# but raise exception for the sake of future development
msg = f"Unexpected normalized_engine {normalized_engine!r}"
raise ValueError(msg)
def _validate_normalize_engine(
engine: Literal["vl-convert"] | None,
format: Literal["png", "svg", "pdf", "vega"],
) -> str:
"""
Helper to validate and normalize the user-provided engine.
engine : {None, 'vl-convert'}
the user-provided engine string
format : string {'png', 'svg', 'pdf', 'vega'}
the format of the mimebundle to be returned
"""
# Try to import vl_convert
try:
vlc = import_vl_convert()
except ImportError:
vlc = None
# Normalize engine string by lower casing and removing underscores and hyphens
normalized_engine = (
None if engine is None else engine.lower().replace("-", "").replace("_", "")
)
# Validate or infer default value of normalized_engine
if normalized_engine == "vlconvert":
if vlc is None:
msg = "The 'vl-convert' conversion engine requires the vl-convert-python package"
raise ValueError(msg)
elif normalized_engine is None:
if vlc is not None:
normalized_engine = "vlconvert"
else:
msg = (
f"Saving charts in {format!r} format requires the vl-convert-python package: "
"see https://altair-viz.github.io/user_guide/saving_charts.html#png-svg-and-pdf-format"
)
raise ValueError(msg)
else:
msg = f"Invalid conversion engine {engine!r}. Expected vl-convert"
raise ValueError(msg)
return normalized_engine
def _pngxy(data):
"""
read the (width, height) from a PNG header.
Taken from IPython.display
"""
ihdr = data.index(b"IHDR")
# next 8 bytes are width/height
return struct.unpack(">ii", data[ihdr + 4 : ihdr + 12])
def preprocess_embed_options(embed_options: dict) -> dict:
"""
Preprocess embed options to a form compatible with Vega Embed.
Parameters
----------
embed_options : dict
The embed options dictionary to preprocess.
Returns
-------
embed_opts : dict
The preprocessed embed options dictionary.
"""
embed_options = (embed_options or {}).copy()
# Convert locale strings to objects compatible with Vega Embed using vl-convert
format_locale = embed_options.get("formatLocale", None)
if isinstance(format_locale, str):
vlc = import_vl_convert()
embed_options["formatLocale"] = vlc.get_format_locale(format_locale)
time_format_locale = embed_options.get("timeFormatLocale", None)
if isinstance(time_format_locale, str):
vlc = import_vl_convert()
embed_options["timeFormatLocale"] = vlc.get_time_format_locale(
time_format_locale
)
return embed_options

View File

@@ -0,0 +1,290 @@
from __future__ import annotations
import sys
from functools import partial
from importlib.metadata import entry_points
from typing import TYPE_CHECKING, Any, Callable, Generic, TypeVar, cast
from altair.utils.deprecation import deprecated_warn
if sys.version_info >= (3, 13):
from typing import TypeIs
else:
from typing_extensions import TypeIs
if sys.version_info >= (3, 12):
from typing import TypeAliasType
else:
from typing_extensions import TypeAliasType
if TYPE_CHECKING:
from types import TracebackType
T = TypeVar("T")
R = TypeVar("R")
Plugin = TypeAliasType("Plugin", Callable[..., R], type_params=(R,))
PluginT = TypeVar("PluginT", bound=Plugin[Any])
IsPlugin = Callable[[object], TypeIs[Plugin[Any]]]
def _is_type(tp: type[T], /) -> Callable[[object], TypeIs[type[T]]]:
"""
Converts a type to guard function.
Added for compatibility with original `PluginRegistry` default.
"""
def func(obj: object, /) -> TypeIs[type[T]]:
return isinstance(obj, tp)
return func
class NoSuchEntryPoint(Exception):
def __init__(self, group, name):
self.group = group
self.name = name
def __str__(self):
return f"No {self.name!r} entry point found in group {self.group!r}"
class PluginEnabler(Generic[PluginT, R]):
"""
Context manager for enabling plugins.
This object lets you use enable() as a context manager to
temporarily enable a given plugin::
with plugins.enable("name"):
do_something() # 'name' plugin temporarily enabled
# plugins back to original state
"""
def __init__(
self, registry: PluginRegistry[PluginT, R], name: str, **options: Any
) -> None:
self.registry: PluginRegistry[PluginT, R] = registry
self.name: str = name
self.options: dict[str, Any] = options
self.original_state: dict[str, Any] = registry._get_state()
self.registry._enable(name, **options)
def __enter__(self) -> PluginEnabler[PluginT, R]:
return self
def __exit__(self, typ: type, value: Exception, traceback: TracebackType) -> None:
self.registry._set_state(self.original_state)
def __repr__(self) -> str:
return f"{type(self.registry).__name__}.enable({self.name!r})"
class PluginRegistry(Generic[PluginT, R]):
"""
A registry for plugins.
This is a plugin registry that allows plugins to be loaded/registered
in two ways:
1. Through an explicit call to ``.register(name, value)``.
2. By looking for other Python packages that are installed and provide
a setuptools entry point group.
When you create an instance of this class, provide the name of the
entry point group to use::
reg = PluginRegister("my_entrypoint_group")
"""
# this is a mapping of name to error message to allow custom error messages
# in case an entrypoint is not found
entrypoint_err_messages: dict[str, str] = {}
# global settings is a key-value mapping of settings that are stored globally
# in the registry rather than passed to the plugins
_global_settings: dict[str, Any] = {}
def __init__(
self, entry_point_group: str = "", plugin_type: IsPlugin = callable
) -> None:
"""
Create a PluginRegistry for a named entry point group.
Parameters
----------
entry_point_group: str
The name of the entry point group.
plugin_type
A type narrowing function that will optionally be used for runtime
type checking loaded plugins.
References
----------
https://typing.readthedocs.io/en/latest/spec/narrowing.html
"""
self.entry_point_group: str = entry_point_group
self.plugin_type: IsPlugin
if plugin_type is not callable and isinstance(plugin_type, type):
msg: Any = (
f"Pass a callable `TypeIs` function to `plugin_type` instead.\n"
f"{type(self).__name__!r}(plugin_type)\n\n"
f"See also:\n"
f"https://typing.readthedocs.io/en/latest/spec/narrowing.html\n"
f"https://docs.astral.sh/ruff/rules/assert/"
)
deprecated_warn(msg, version="5.4.0")
self.plugin_type = cast(IsPlugin, _is_type(plugin_type))
else:
self.plugin_type = plugin_type
self._active: Plugin[R] | None = None
self._active_name: str = ""
self._plugins: dict[str, PluginT] = {}
self._options: dict[str, Any] = {}
self._global_settings: dict[str, Any] = self.__class__._global_settings.copy()
def register(self, name: str, value: PluginT | None) -> PluginT | None:
"""
Register a plugin by name and value.
This method is used for explicit registration of a plugin and shouldn't be
used to manage entry point managed plugins, which are auto-loaded.
Parameters
----------
name: str
The name of the plugin.
value: PluginType or None
The actual plugin object to register or None to unregister that plugin.
Returns
-------
plugin: PluginType or None
The plugin that was registered or unregistered.
"""
if value is None:
return self._plugins.pop(name, None)
elif self.plugin_type(value):
self._plugins[name] = value
return value
else:
msg = f"{type(value).__name__!r} is not compatible with {type(self).__name__!r}"
raise TypeError(msg)
def names(self) -> list[str]:
"""List the names of the registered and entry points plugins."""
exts = list(self._plugins.keys())
e_points = importlib_metadata_get(self.entry_point_group)
more_exts = [ep.name for ep in e_points]
exts.extend(more_exts)
return sorted(set(exts))
def _get_state(self) -> dict[str, Any]:
"""Return a dictionary representing the current state of the registry."""
return {
"_active": self._active,
"_active_name": self._active_name,
"_plugins": self._plugins.copy(),
"_options": self._options.copy(),
"_global_settings": self._global_settings.copy(),
}
def _set_state(self, state: dict[str, Any]) -> None:
"""Reset the state of the registry."""
assert set(state.keys()) == {
"_active",
"_active_name",
"_plugins",
"_options",
"_global_settings",
}
for key, val in state.items():
setattr(self, key, val)
def _enable(self, name: str, **options) -> None:
if name not in self._plugins:
try:
(ep,) = (
ep
for ep in importlib_metadata_get(self.entry_point_group)
if ep.name == name
)
except ValueError as err:
if name in self.entrypoint_err_messages:
raise ValueError(self.entrypoint_err_messages[name]) from err
else:
raise NoSuchEntryPoint(self.entry_point_group, name) from err
value = cast(PluginT, ep.load())
self.register(name, value)
self._active_name = name
self._active = self._plugins[name]
for key in set(options.keys()) & set(self._global_settings.keys()):
self._global_settings[key] = options.pop(key)
self._options = options
def enable(
self, name: str | None = None, **options: Any
) -> PluginEnabler[PluginT, R]:
"""
Enable a plugin by name.
This can be either called directly, or used as a context manager.
Parameters
----------
name : string (optional)
The name of the plugin to enable. If not specified, then use the
current active name.
**options :
Any additional parameters will be passed to the plugin as keyword
arguments
Returns
-------
PluginEnabler:
An object that allows enable() to be used as a context manager
"""
if name is None:
name = self.active
return PluginEnabler(self, name, **options)
@property
def active(self) -> str:
"""Return the name of the currently active plugin."""
return self._active_name
@property
def options(self) -> dict[str, Any]:
"""Return the current options dictionary."""
return self._options
def get(self) -> partial[R] | Plugin[R] | None:
"""Return the currently active plugin."""
if (func := self._active) and self.plugin_type(func):
return partial(func, **self._options) if self._options else func
elif self._active is not None:
msg = (
f"{type(self).__name__!r} requires all plugins to be callable objects, "
f"but {type(self._active).__name__!r} is not callable."
)
raise TypeError(msg)
elif TYPE_CHECKING:
# NOTE: The `None` return is implicit, but `mypy` isn't satisfied
# - `ruff` will factor out explicit `None` return
# - `pyright` has no issue
raise NotImplementedError
def __repr__(self) -> str:
return f"{type(self).__name__}(active={self.active!r}, registered={self.names()!r})"
def importlib_metadata_get(group):
ep = entry_points()
# 'select' was introduced in Python 3.10 and 'get' got deprecated
# We don't check for Python version here as by checking with hasattr we
# also get compatibility with the importlib_metadata package which had a different
# deprecation cycle for 'get'
if hasattr(ep, "select"):
return ep.select(group=group) # pyright: ignore
else:
return ep.get(group, [])

View File

@@ -0,0 +1,224 @@
from __future__ import annotations
import json
import pathlib
import warnings
from typing import IO, TYPE_CHECKING, Any, Literal
from altair.utils._vegafusion_data import using_vegafusion
from altair.utils.deprecation import deprecated_warn
from altair.vegalite.v5.data import data_transformers
from .mimebundle import spec_to_mimebundle
if TYPE_CHECKING:
from pathlib import Path
def write_file_or_filename(
fp: str | Path | IO,
content: str | bytes,
mode: str = "w",
encoding: str | None = None,
) -> None:
"""Write content to fp, whether fp is a string, a pathlib Path or a file-like object."""
if isinstance(fp, (str, pathlib.Path)):
with pathlib.Path(fp).open(mode=mode, encoding=encoding) as f:
f.write(content)
else:
fp.write(content)
def set_inspect_format_argument(
format: str | None, fp: str | Path | IO, inline: bool
) -> str:
"""Inspect the format argument in the save function."""
if format is None:
if isinstance(fp, (str, pathlib.Path)):
format = pathlib.Path(fp).suffix.lstrip(".")
else:
msg = (
"must specify file format: "
"['png', 'svg', 'pdf', 'html', 'json', 'vega']"
)
raise ValueError(msg)
if format != "html" and inline:
warnings.warn("inline argument ignored for non HTML formats.", stacklevel=1)
return format
def set_inspect_mode_argument(
mode: Literal["vega-lite"] | None,
embed_options: dict[str, Any],
spec: dict[str, Any],
vegalite_version: str | None,
) -> Literal["vega-lite"]:
"""Inspect the mode argument in the save function."""
if mode is None:
if "mode" in embed_options:
mode = embed_options["mode"]
elif "$schema" in spec:
mode = spec["$schema"].split("/")[-2]
else:
mode = "vega-lite"
if mode != "vega-lite":
msg = "mode must be 'vega-lite', " f"not '{mode}'"
raise ValueError(msg)
if mode == "vega-lite" and vegalite_version is None:
msg = "must specify vega-lite version"
raise ValueError(msg)
return mode
def save(
chart,
fp: str | Path | IO,
vega_version: str | None,
vegaembed_version: str | None,
format: Literal["json", "html", "png", "svg", "pdf"] | None = None,
mode: Literal["vega-lite"] | None = None,
vegalite_version: str | None = None,
embed_options: dict | None = None,
json_kwds: dict | None = None,
scale_factor: float = 1,
engine: Literal["vl-convert"] | None = None,
inline: bool = False,
**kwargs,
) -> None:
"""
Save a chart to file in a variety of formats.
Supported formats are [json, html, png, svg, pdf]
Parameters
----------
chart : alt.Chart
the chart instance to save
fp : string filename, pathlib.Path or file-like object
file to which to write the chart.
format : string (optional)
the format to write: one of ['json', 'html', 'png', 'svg', 'pdf'].
If not specified, the format will be determined from the filename.
mode : string (optional)
Must be 'vega-lite'. If not specified, then infer the mode from
the '$schema' property of the spec, or the ``opt`` dictionary.
If it's not specified in either of those places, then use 'vega-lite'.
vega_version : string (optional)
For html output, the version of vega.js to use
vegalite_version : string (optional)
For html output, the version of vegalite.js to use
vegaembed_version : string (optional)
For html output, the version of vegaembed.js to use
embed_options : dict (optional)
The vegaEmbed options dictionary. Default is {}
(See https://github.com/vega/vega-embed for details)
json_kwds : dict (optional)
Additional keyword arguments are passed to the output method
associated with the specified format.
scale_factor : float (optional)
scale_factor to use to change size/resolution of png or svg output
engine: string {'vl-convert'}
the conversion engine to use for 'png', 'svg', and 'pdf' formats
inline: bool (optional)
If False (default), the required JavaScript libraries are loaded
from a CDN location in the resulting html file.
If True, the required JavaScript libraries are inlined into the resulting
html file so that it will work without an internet connection.
The vl-convert-python package is required if True.
**kwargs :
additional kwargs passed to spec_to_mimebundle.
"""
if _ := kwargs.pop("webdriver", None):
deprecated_warn(
"The webdriver argument is not relevant for the new vl-convert engine which replaced altair_saver. "
"The argument will be removed in a future release.",
version="5.0.0",
)
json_kwds = json_kwds or {}
encoding = kwargs.get("encoding", "utf-8")
format = set_inspect_format_argument(format, fp, inline) # type: ignore[assignment]
def perform_save() -> None:
spec = chart.to_dict(context={"pre_transform": False})
inner_mode = set_inspect_mode_argument(
mode, embed_options or {}, spec, vegalite_version
)
if format == "json":
json_spec = json.dumps(spec, **json_kwds)
write_file_or_filename(fp, json_spec, mode="w", encoding=encoding)
elif format == "html":
if inline:
kwargs["template"] = "inline"
mb_html = spec_to_mimebundle(
spec=spec,
format=format,
mode=inner_mode,
vega_version=vega_version,
vegalite_version=vegalite_version,
vegaembed_version=vegaembed_version,
embed_options=embed_options,
json_kwds=json_kwds,
**kwargs,
)
write_file_or_filename(
fp, mb_html["text/html"], mode="w", encoding=encoding
)
elif format == "png":
mb_png = spec_to_mimebundle(
spec=spec,
format=format,
mode=inner_mode,
vega_version=vega_version,
vegalite_version=vegalite_version,
vegaembed_version=vegaembed_version,
embed_options=embed_options,
scale_factor=scale_factor,
engine=engine,
**kwargs,
)
write_file_or_filename(fp, mb_png[0]["image/png"], mode="wb")
elif format in {"svg", "pdf", "vega"}:
mb_any = spec_to_mimebundle(
spec=spec,
format=format,
mode=inner_mode,
vega_version=vega_version,
vegalite_version=vegalite_version,
vegaembed_version=vegaembed_version,
embed_options=embed_options,
scale_factor=scale_factor,
engine=engine,
**kwargs,
)
if format == "pdf":
write_file_or_filename(fp, mb_any["application/pdf"], mode="wb")
else:
write_file_or_filename(
fp, mb_any["image/svg+xml"], mode="w", encoding=encoding
)
else:
msg = f"Unsupported format: '{format}'"
raise ValueError(msg)
if using_vegafusion():
# When the vegafusion data transformer is enabled, transforms will be
# evaluated during save and the resulting data will be included in the
# vega specification that is saved.
with data_transformers.disable_max_rows():
perform_save()
else:
# Temporarily turn off any data transformers so that all data is inlined
# when calling chart.to_dict. This is relevant for vl-convert which cannot access
# local json files which could be created by a json data transformer. Furthermore,
# we don't exit the with statement until this function completed due to the issue
# described at https://github.com/vega/vl-convert/issues/31
with data_transformers.enable("default"), data_transformers.disable_max_rows():
perform_save()

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,130 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Any, NewType
# Type representing the "{selection}_store" dataset that corresponds to a
# Vega-Lite selection
Store = NewType("Store", list[dict[str, Any]])
@dataclass(frozen=True, eq=True)
class IndexSelection:
"""
Represents the state of an alt.selection_point() when neither the fields nor encodings arguments are specified.
The value field is a list of zero-based indices into the
selected dataset.
Note: These indices only apply to the input DataFrame
for charts that do not include aggregations (e.g. a scatter chart).
"""
name: str
value: list[int]
store: Store
@staticmethod
def from_vega(name: str, signal: dict[str, dict] | None, store: Store):
"""
Construct an IndexSelection from the raw Vega signal and dataset values.
Parameters
----------
name: str
The selection's name
signal: dict or None
The value of the Vega signal corresponding to the selection
store: list
The value of the Vega dataset corresponding to the selection.
This dataset is named "{name}_store" in the Vega view.
Returns
-------
IndexSelection
"""
if signal is None:
indices = []
else:
points = signal.get("vlPoint", {}).get("or", [])
indices = [p["_vgsid_"] - 1 for p in points]
return IndexSelection(name=name, value=indices, store=store)
@dataclass(frozen=True, eq=True)
class PointSelection:
"""
Represents the state of an alt.selection_point() when the fields or encodings arguments are specified.
The value field is a list of dicts of the form:
[{"dim1": 1, "dim2": "A"}, {"dim1": 2, "dim2": "BB"}]
where "dim1" and "dim2" are dataset columns and the dict values
correspond to the specific selected values.
"""
name: str
value: list[dict[str, Any]]
store: Store
@staticmethod
def from_vega(name: str, signal: dict[str, dict] | None, store: Store):
"""
Construct a PointSelection from the raw Vega signal and dataset values.
Parameters
----------
name: str
The selection's name
signal: dict or None
The value of the Vega signal corresponding to the selection
store: list
The value of the Vega dataset corresponding to the selection.
This dataset is named "{name}_store" in the Vega view.
Returns
-------
PointSelection
"""
points = [] if signal is None else signal.get("vlPoint", {}).get("or", [])
return PointSelection(name=name, value=points, store=store)
@dataclass(frozen=True, eq=True)
class IntervalSelection:
"""
Represents the state of an alt.selection_interval().
The value field is a dict of the form:
{"dim1": [0, 10], "dim2": ["A", "BB", "CCC"]}
where "dim1" and "dim2" are dataset columns and the dict values
correspond to the selected range.
"""
name: str
value: dict[str, list]
store: Store
@staticmethod
def from_vega(name: str, signal: dict[str, list] | None, store: Store):
"""
Construct an IntervalSelection from the raw Vega signal and dataset values.
Parameters
----------
name: str
The selection's name
signal: dict or None
The value of the Vega signal corresponding to the selection
store: list
The value of the Vega dataset corresponding to the selection.
This dataset is named "{name}_store" in the Vega view.
Returns
-------
PointSelection
"""
if signal is None:
signal = {}
return IntervalSelection(name=name, value=signal, store=store)

View File

@@ -0,0 +1,151 @@
"""
A Simple server used to show altair graphics from a prompt or script.
This is adapted from the mpld3 package; see
https://github.com/mpld3/mpld3/blob/master/mpld3/_server.py
"""
import itertools
import random
import socket
import sys
import threading
import webbrowser
from http import server
from io import BytesIO as IO
JUPYTER_WARNING = """
Note: if you're in the Jupyter notebook, Chart.serve() is not the best
way to view plots. Consider using Chart.display().
You must interrupt the kernel to cancel this command.
"""
# Mock server used for testing
class MockRequest:
def makefile(self, *args, **kwargs):
return IO(b"GET /")
def sendall(self, response):
pass
class MockServer:
def __init__(self, ip_port, Handler):
Handler(MockRequest(), ip_port[0], self)
def serve_forever(self):
pass
def server_close(self):
pass
def generate_handler(html, files=None):
if files is None:
files = {}
class MyHandler(server.BaseHTTPRequestHandler):
def do_GET(self):
"""Respond to a GET request."""
if self.path == "/":
self.send_response(200)
self.send_header("Content-type", "text/html")
self.end_headers()
self.wfile.write(html.encode())
elif self.path in files:
content_type, content = files[self.path]
self.send_response(200)
self.send_header("Content-type", content_type)
self.end_headers()
self.wfile.write(content.encode())
else:
self.send_error(404)
return MyHandler
def find_open_port(ip, port, n=50):
"""Find an open port near the specified port."""
ports = itertools.chain(
(port + i for i in range(n)), (port + random.randint(-2 * n, 2 * n))
)
for port in ports:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
result = s.connect_ex((ip, port))
s.close()
if result != 0:
return port
msg = "no open ports found"
raise ValueError(msg)
def serve(
html,
ip="127.0.0.1",
port=8888,
n_retries=50,
files=None,
jupyter_warning=True,
open_browser=True,
http_server=None,
) -> None:
"""
Start a server serving the given HTML, and (optionally) open a browser.
Parameters
----------
html : string
HTML to serve
ip : string (default = '127.0.0.1')
ip address at which the HTML will be served.
port : int (default = 8888)
the port at which to serve the HTML
n_retries : int (default = 50)
the number of nearby ports to search if the specified port is in use.
files : dictionary (optional)
dictionary of extra content to serve
jupyter_warning : bool (optional)
if True (default), then print a warning if this is used within Jupyter
open_browser : bool (optional)
if True (default), then open a web browser to the given HTML
http_server : class (optional)
optionally specify an HTTPServer class to use for showing the
figure. The default is Python's basic HTTPServer.
"""
port = find_open_port(ip, port, n_retries)
Handler = generate_handler(html, files)
if http_server is None:
srvr = server.HTTPServer((ip, port), Handler)
else:
srvr = http_server((ip, port), Handler)
if jupyter_warning:
try:
__IPYTHON__ # type: ignore # noqa
except NameError:
pass
else:
print(JUPYTER_WARNING)
# Start the server
print(f"Serving to http://{ip}:{port}/ [Ctrl-C to exit]")
sys.stdout.flush()
if open_browser:
# Use a thread to open a web browser pointing to the server
def b():
return webbrowser.open(f"http://{ip}:{port}")
threading.Thread(target=b).start()
try:
srvr.serve_forever()
except (KeyboardInterrupt, SystemExit):
print("\nstopping Server...")
srvr.server_close()