Mise à jour de Monitor.py et autres scripts

This commit is contained in:
Debian
2025-07-23 10:46:27 +02:00
parent 7081418ce0
commit 7de3e0fb50
8604 changed files with 2789953 additions and 295 deletions

View File

@@ -0,0 +1,28 @@
# Copyright (c) Streamlit Inc. (2018-2022) Snowflake Inc. (2022-2025)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from streamlit.connections.base_connection import BaseConnection
from streamlit.connections.snowflake_connection import SnowflakeConnection
from streamlit.connections.snowpark_connection import SnowparkConnection
from streamlit.connections.sql_connection import SQLConnection
ExperimentalBaseConnection = BaseConnection
__all__ = [
"BaseConnection",
"SnowflakeConnection",
"SnowparkConnection",
"SQLConnection",
"ExperimentalBaseConnection",
]

View File

@@ -0,0 +1,174 @@
# Copyright (c) Streamlit Inc. (2018-2022) Snowflake Inc. (2022-2025)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
import json
from abc import ABC, abstractmethod
from typing import Any, Generic, TypeVar
from streamlit.runtime.secrets import AttrDict, secrets_singleton
from streamlit.util import calc_md5
RawConnectionT = TypeVar("RawConnectionT")
class BaseConnection(ABC, Generic[RawConnectionT]):
"""The abstract base class that all Streamlit Connections must inherit from.
This base class provides connection authors with a standardized way to hook into the
``st.connection()`` factory function: connection authors are required to provide an
implementation for the abstract method ``_connect`` in their subclasses.
Additionally, it also provides a few methods/properties designed to make
implementation of connections more convenient. See the docstrings for each of the
methods of this class for more information
.. note::
While providing an implementation of ``_connect`` is technically all that's
required to define a valid connection, connections should also provide the user
with context-specific ways of interacting with the underlying connection object.
For example, the first-party SQLConnection provides a ``query()`` method for
reads and a ``session`` property for more complex operations.
"""
def __init__(self, connection_name: str, **kwargs) -> None:
"""Create a BaseConnection.
This constructor is called by the connection factory machinery when a user
script calls ``st.connection()``.
Subclasses of BaseConnection that want to overwrite this method should take care
to also call the base class' implementation.
Parameters
----------
connection_name : str
The name of this connection. This corresponds to the
``[connections.<connection_name>]`` config section in ``st.secrets``.
kwargs : dict
Any other kwargs to pass to this connection class' ``_connect`` method.
Returns
-------
None
"""
self._connection_name = connection_name
self._kwargs = kwargs
self._config_section_hash = calc_md5(json.dumps(self._secrets.to_dict()))
secrets_singleton.file_change_listener.connect(self._on_secrets_changed)
self._raw_instance: RawConnectionT | None = self._connect(**kwargs)
def __del__(self) -> None:
secrets_singleton.file_change_listener.disconnect(self._on_secrets_changed)
def __getattribute__(self, name: str) -> Any:
try:
return object.__getattribute__(self, name)
except AttributeError as e:
if hasattr(self._instance, name):
raise AttributeError(
f"`{name}` doesn't exist here, but you can call `._instance.{name}` instead"
)
raise e
# Methods with default implementations that we don't expect subclasses to want or
# need to overwrite.
def _on_secrets_changed(self, _) -> None:
"""Reset the raw connection object when this connection's secrets change.
We don't expect either user scripts or connection authors to have to use or
overwrite this method.
"""
new_hash = calc_md5(json.dumps(self._secrets.to_dict()))
# Only reset the connection if the secrets file section specific to this
# connection has changed.
if new_hash != self._config_section_hash:
self._config_section_hash = new_hash
self.reset()
@property
def _secrets(self) -> AttrDict:
"""Get the secrets for this connection from the corresponding st.secrets section.
We expect this property to be used primarily by connection authors when they
are implementing their class' ``_connect`` method. User scripts should, for the
most part, have no reason to use this property.
"""
connections_section = None
if secrets_singleton.load_if_toml_exists():
connections_section = secrets_singleton.get("connections")
if type(connections_section) is not AttrDict:
return AttrDict({})
return connections_section.get(self._connection_name, AttrDict({}))
def reset(self) -> None:
"""Reset this connection so that it gets reinitialized the next time it's used.
This method can be useful when a connection has become stale, an auth token has
expired, or in similar scenarios where a broken connection might be fixed by
reinitializing it. Note that some connection methods may already use ``reset()``
in their error handling code.
Returns
-------
None
Example
-------
>>> import streamlit as st
>>>
>>> conn = st.connection("my_conn")
>>>
>>> # Reset the connection before using it if it isn't healthy
>>> # Note: is_healthy() isn't a real method and is just shown for example here.
>>> if not conn.is_healthy():
... conn.reset()
>>>
>>> # Do stuff with conn...
"""
self._raw_instance = None
@property
def _instance(self) -> RawConnectionT:
"""Get an instance of the underlying connection, creating a new one if needed."""
if self._raw_instance is None:
self._raw_instance = self._connect(**self._kwargs)
return self._raw_instance
# Abstract fields/methods that subclasses of BaseConnection must implement
@abstractmethod
def _connect(self, **kwargs) -> RawConnectionT:
"""Create an instance of an underlying connection object.
This abstract method is the one method that we require subclasses of
BaseConnection to provide an implementation for. It is called when first
creating a connection and when reconnecting after a connection is reset.
Parameters
----------
kwargs : dict
Returns
-------
RawConnectionT
The underlying connection object.
"""
raise NotImplementedError

View File

@@ -0,0 +1,561 @@
# Copyright (c) Streamlit Inc. (2018-2022) Snowflake Inc. (2022-2025)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# NOTE: We won't always be able to import from snowflake.{connector, snowpark}.* so need
# the `type: ignore` comment below, but that comment will explode if `warn-unused-ignores`
# is turned on when the package is available. Unfortunately, mypy doesn't provide a good
# way to configure this at a per-line level :(
# mypy: no-warn-unused-ignores
from __future__ import annotations
from typing import TYPE_CHECKING, Final, cast
from streamlit import logger
from streamlit.connections import BaseConnection
from streamlit.connections.util import running_in_sis
from streamlit.errors import StreamlitAPIException
from streamlit.runtime.caching import cache_data
_LOGGER: Final = logger.get_logger(__name__)
if TYPE_CHECKING:
from datetime import timedelta
from pandas import DataFrame
from snowflake.connector.cursor import SnowflakeCursor # type:ignore[import]
from snowflake.snowpark.session import Session # type:ignore[import]
from snowflake.connector import ( # type:ignore[import] # isort: skip
SnowflakeConnection as InternalSnowflakeConnection,
)
class SnowflakeConnection(BaseConnection["InternalSnowflakeConnection"]):
"""A connection to Snowflake using the Snowflake Connector for Python.
Initialize this connection object using ``st.connection("snowflake")`` or
``st.connection("<name>", type="snowflake")``. Connection parameters for a
SnowflakeConnection can be specified using ``secrets.toml`` and/or
``**kwargs``. Connection parameters are passed to
|snowflake.connector.connect()|.
When an app is running in Streamlit in Snowflake,
``st.connection("snowflake")`` connects automatically using the app owner's
role without further configuration. ``**kwargs`` will be ignored in this
case. Use ``secrets.toml`` and ``**kwargs`` to configure your connection
for local development.
SnowflakeConnection includes several convenience methods. For example, you
can directly execute a SQL query with ``.query()`` or access the underlying
Snowflake Connector object with ``.raw_connection``.
.. |snowflake.connector.connect()| replace:: ``snowflake.connector.connect()``
.. _snowflake.connector.connect(): https://docs.snowflake.com/en/developer-guide/python-connector/python-connector-api#label-snowflake-connector-methods-connect
.. Tip::
`snowflake-snowpark-python <https://pypi.org/project/snowflake-snowpark-python/>`_
must be installed in your environment to use this connection. You can
install Snowflake extras along with Streamlit:
>>> pip install streamlit[snowflake]
.. Important::
Account identifiers must be of the form ``<orgname>-<account_name>``
where ``<orgname>`` is the name of your Snowflake organization and
``<account_name>`` is the unique name of your account within your
organization. This is dash-separated, not dot-separated like when used
in SQL queries. For more information, see `Account identifiers
<https://docs.snowflake.com/en/user-guide/admin-account-identifier>`_.
Examples
--------
**Example 1: Configuration with Streamlit secrets**
You can configure your Snowflake connection using Streamlit's
`Secrets management <https://docs.streamlit.io/develop/concepts/connections/secrets-management>`_.
For example, if you have MFA enabled on your account, you can connect using
`key-pair authentication <https://docs.snowflake.com/en/user-guide/key-pair-auth>`_.
``.streamlit/secrets.toml``:
>>> [connections.snowflake]
>>> account = "xxx-xxx"
>>> user = "xxx"
>>> private_key_file = "/xxx/xxx/xxx.p8"
>>> role = "xxx"
>>> warehouse = "xxx"
>>> database = "xxx"
>>> schema = "xxx"
Your app code:
>>> import streamlit as st
>>> conn = st.connection("snowflake")
>>> df = conn.query("SELECT * FROM my_table")
**Example 2: Configuration with keyword arguments and external authentication**
You can configure your Snowflake connection with keyword arguments. The
keyword arguments are merged with (and take precedence over) the values in
``secrets.toml``. However, if you name your connection ``"snowflake"`` and
don't have a ``[connections.snowflake]`` dictionary in your
``secrets.toml`` file, Streamlit will ignore any keyword arguments and use
the default Snowflake connection as described in Example 5 and Example 6.
To configure your connection using only keyword arguments, declare a name
for the connection other than ``"snowflake"``.
For example, if your Snowflake account supports SSO, you can set up a quick
local connection for development using `browser-based SSO
<https://docs.snowflake.com/en/user-guide/admin-security-fed-auth-use#how-browser-based-sso-works>`_.
Because there is nothing configured in ``secrets.toml``, the name is an
empty string and the type is set to ``"snowflake"``. This prevents
Streamlit from ignoring the keyword arguments and using a default
Snowflake connection.
>>> import streamlit as st
>>> conn = st.connection(
... "",
... type="snowflake",
... account="xxx-xxx",
... user="xxx",
... authenticator="externalbrowser",
... )
>>> df = conn.query("SELECT * FROM my_table")
**Example 3: Named connection with Snowflake's connection configuration file**
Snowflake's Python Connector supports a `connection configuration file
<https://docs.snowflake.com/en/developer-guide/python-connector/python-connector-connect#connecting-using-the-connections-toml-file>`_,
which is well integrated with Streamlit's ``SnowflakeConnection``. If you
already have one or more connections configured, all you need to do is pass
the name of the connection to use.
``~/.snowflake/connections.toml``:
>>> [my_connection]
>>> account = "xxx-xxx"
>>> user = "xxx"
>>> password = "xxx"
>>> warehouse = "xxx"
>>> database = "xxx"
>>> schema = "xxx"
Your app code:
>>> import streamlit as st
>>> conn = st.connection("my_connection", type="snowflake")
>>> df = conn.query("SELECT * FROM my_table")
**Example 4: Named connection with Streamlit secrets and Snowflake's connection configuration file**
If you have a Snowflake configuration file with a connection named
``my_connection`` as in Example 3, you can pass the connection name through
``secrets.toml``.
``.streamlit/secrets.toml``:
>>> [connections.snowflake]
>>> connection_name = "my_connection"
Your app code:
>>> import streamlit as st
>>> conn = st.connection("snowflake")
>>> df = conn.query("SELECT * FROM my_table")
**Example 5: Default connection with an environment variable**
If you don't have a ``[connections.snowflake]`` dictionary in your
``secrets.toml`` file and use ``st.connection("snowflake")``, Streamlit
will use the default connection for the `Snowflake Python Connector
<https://docs.snowflake.cn/en/developer-guide/python-connector/python-connector-connect#setting-a-default-connection>`_.
If you have a Snowflake configuration file with a connection named
``my_connection`` as in Example 3, you can set an environment variable to
declare it as the default Snowflake connection.
>>> SNOWFLAKE_DEFAULT_CONNECTION_NAME = "my_connection"
Your app code:
>>> import streamlit as st
>>> conn = st.connection("snowflake")
>>> df = conn.query("SELECT * FROM my_table")
**Example 6: Default connection in Snowflake's connection configuration file**
If you have a Snowflake configuration file that defines your ``default``
connection, Streamlit will automatically use it if no other connection is
declared.
``~/.snowflake/connections.toml``:
>>> [default]
>>> account = "xxx-xxx"
>>> user = "xxx"
>>> password = "xxx"
>>> warehouse = "xxx"
>>> database = "xxx"
>>> schema = "xxx"
Your app code:
>>> import streamlit as st
>>> conn = st.connection("snowflake")
>>> df = conn.query("SELECT * FROM my_table")
"""
def _connect(self, **kwargs) -> InternalSnowflakeConnection:
import snowflake.connector # type:ignore[import]
from snowflake.connector import Error as SnowflakeError # type:ignore[import]
# If we're running in SiS, just call get_active_session() and retrieve the
# lower-level connection from it.
if running_in_sis():
from snowflake.snowpark.context import ( # type:ignore[import] # isort: skip
get_active_session,
)
session = get_active_session()
if hasattr(session, "connection"):
return session.connection
# session.connection is only a valid attr in more recent versions of
# snowflake-connector-python, so we fall back to grabbing
# session._conn._conn if `.connection` is unavailable.
return session._conn._conn
# We require qmark-style parameters everywhere for consistency across different
# environments where SnowflakeConnections may be used.
snowflake.connector.paramstyle = "qmark"
# Otherwise, attempt to create a new connection from whatever credentials we
# have available.
try:
st_secrets = self._secrets.to_dict()
if len(st_secrets):
_LOGGER.info(
"Connect to Snowflake using the Streamlit secret defined under "
"[connections.snowflake]."
)
conn_kwargs = {**st_secrets, **kwargs}
return snowflake.connector.connect(**conn_kwargs)
# Use the default configuration as defined in https://docs.snowflake.cn/en/developer-guide/python-connector/python-connector-connect#setting-a-default-connection
if self._connection_name == "snowflake":
_LOGGER.info(
"Connect to Snowflake using the default configuration as defined "
"in https://docs.snowflake.cn/en/developer-guide/python-connector/python-connector-connect#setting-a-default-connection"
)
return snowflake.connector.connect()
return snowflake.connector.connect(**kwargs)
except SnowflakeError as e:
if not len(st_secrets) and not kwargs:
raise StreamlitAPIException(
"Missing Snowflake connection configuration. "
"Did you forget to set this in `secrets.toml`, a Snowflake configuration file, "
"or as kwargs to `st.connection`? "
"See the [SnowflakeConnection configuration documentation](https://docs.streamlit.io/st.connections.snowflakeconnection-configuration) "
"for more details and examples."
)
raise e
def query(
self,
sql: str,
*, # keyword-only arguments:
ttl: float | int | timedelta | None = None,
show_spinner: bool | str = "Running `snowflake.query(...)`.",
params=None,
**kwargs,
) -> DataFrame:
"""Run a read-only SQL query.
This method implements query result caching and simple error
handling/retries. The caching behavior is identical to that of using
``@st.cache_data``.
.. note::
Queries that are run without a specified ``ttl`` are cached
indefinitely.
Parameters
----------
sql : str
The read-only SQL query to execute.
ttl : float, int, timedelta or None
The maximum number of seconds to keep results in the cache. If this
is ``None`` (default), cached results do not expire with time.
show_spinner : boolean or string
Whether to enable the spinner. When a cached query is executed, no
spinner is displayed because the result is immediately available.
When a new query is executed, the default is to show a spinner with
the message "Running ``snowflake.query(...)``."
If this is ``False``, no spinner displays while executing the
query. If this is a string, the string will be used as the message
for the spinner.
params : list, tuple, dict or None
List of parameters to pass to the Snowflake Connector for Python
``Cursor.execute()`` method. This connector supports binding data
to a SQL statement using qmark bindings. For more information and
examples, see the `Snowflake Connector for Python documentation
<https://docs.snowflake.com/en/developer-guide/python-connector/python-connector-example#using-qmark-or-numeric-binding>`_.
This defaults to ``None``.
Returns
-------
pandas.DataFrame
The result of running the query, formatted as a pandas DataFrame.
Example
-------
>>> import streamlit as st
>>>
>>> conn = st.connection("snowflake")
>>> df = conn.query("SELECT * FROM my_table")
>>> st.dataframe(df)
"""
from tenacity import retry, retry_if_exception, stop_after_attempt, wait_fixed
# the ANSI-compliant SQL code for "connection was not established" (see docs: https://docs.snowflake.com/en/developer-guide/python-connector/python-connector-api#id6)
SQLSTATE_CONNECTION_WAS_NOT_ESTABLISHED = "08001"
@retry(
after=lambda _: self.reset(),
stop=stop_after_attempt(3),
reraise=True,
# We don't have to implement retries ourself for most error types as the
# `snowflake-connector-python` library already implements retries for
# retryable HTTP errors.
retry=retry_if_exception(
lambda e: hasattr(e, "sqlstate")
and e.sqlstate == SQLSTATE_CONNECTION_WAS_NOT_ESTABLISHED
),
wait=wait_fixed(1),
)
def _query(sql: str) -> DataFrame:
cur = self._instance.cursor()
cur.execute(sql, params=params, **kwargs)
return cur.fetch_pandas_all()
# We modify our helper function's `__qualname__` here to work around default
# `@st.cache_data` behavior. Otherwise, `.query()` being called with different
# `ttl` values will reset the cache with each call, and the query caches won't
# be scoped by connection.
ttl_str = str( # Avoid adding extra `.` characters to `__qualname__`
ttl
).replace(".", "_")
_query.__qualname__ = f"{_query.__qualname__}_{self._connection_name}_{ttl_str}"
_query = cache_data(
show_spinner=show_spinner,
ttl=ttl,
)(_query)
return _query(sql)
def write_pandas(
self,
df: DataFrame,
table_name: str,
database: str | None = None,
schema: str | None = None,
chunk_size: int | None = None,
**kwargs,
) -> tuple[bool, int, int]:
"""Write a ``pandas.DataFrame`` to a table in a Snowflake database.
This convenience method is a thin wrapper around
``snowflake.connector.pandas_tools.write_pandas()`` using the
underlying connection. The ``conn`` parameter is passed automatically.
For more information and additional keyword arguments, see the
`Snowflake Connector for Python documentation
<https://docs.snowflake.com/en/developer-guide/python-connector/python-connector-api#write_pandas>`_.
Parameters
----------
df: pandas.DataFrame
The ``pandas.DataFrame`` object containing the data to be copied
into the table.
table_name: str
Name of the table where the data should be copied to.
database: str
Name of the database containing the table. By default, the function
writes to the database that is currently in use in the session.
.. Note::
If you specify this parameter, you must also specify the schema
parameter.
schema: str
Name of the schema containing the table. By default, the function
writes to the table in the schema that is currently in use in the
session.
chunk_size: int
Number of elements to insert at a time. By default, the function
inserts all elements in one chunk.
**kwargs: Any
Additional keyword arguments for
``snowflake.connector.pandas_tools.write_pandas()``.
Returns
-------
tuple[bool, int, int]
A tuple containing three values:
1. A boolean value that is ``True`` if the write was successful.
2. An integer giving the number of chunks of data that were copied.
3. An integer giving the number of rows that were inserted.
Example
-------
The following example uses the database and schema currently in use in
the session and copies the data into a table named "my_table."
>>> import streamlit as st
>>> import pandas as pd
>>>
>>> df = pd.DataFrame(
... {"Name": ["Mary", "John", "Robert"], "Pet": ["dog", "cat", "bird"]}
... )
>>> conn = st.connection("snowflake")
>>> conn.write_pandas(df, "my_table")
"""
from snowflake.connector.pandas_tools import write_pandas # type:ignore[import]
success, nchunks, nrows, _ = write_pandas(
conn=self._instance,
df=df,
table_name=table_name,
database=database,
schema=schema,
chunk_size=chunk_size,
**kwargs,
)
return (success, nchunks, nrows)
def cursor(self) -> SnowflakeCursor:
"""Create a new cursor object from this connection.
Snowflake Connector cursors implement the Python Database API v2.0
specification (PEP-249). For more information, see the
`Snowflake Connector for Python documentation
<https://docs.snowflake.com/en/developer-guide/python-connector/python-connector-api#object-cursor>`_.
Returns
-------
snowflake.connector.cursor.SnowflakeCursor
A cursor object for the connection.
Example
-------
The following example uses a cursor to insert multiple rows into a
table. The ``qmark`` parameter style is specified as an optional
keyword argument. Alternatively, the parameter style can be declared in
your connection configuration file. For more information, see the
`Snowflake Connector for Python documentation
<https://docs.snowflake.com/en/developer-guide/python-connector/python-connector-example#using-qmark-or-numeric-binding>`_.
>>> import streamlit as st
>>>
>>> conn = st.connection("snowflake", "paramstyle"="qmark")
>>> rows_to_insert = [("Mary", "dog"), ("John", "cat"), ("Robert", "bird")]
>>> conn.cursor().executemany(
... "INSERT INTO mytable (name, pet) VALUES (?, ?)", rows_to_insert
... )
"""
return self._instance.cursor()
@property
def raw_connection(self) -> InternalSnowflakeConnection:
"""Access the underlying connection object from the Snowflake\
Connector for Python.
For information on how to use the Snowflake Connector for Python, see
the `Snowflake Connector for Python documentation
<https://docs.snowflake.com/en/developer-guide/python-connector/python-connector-example>`_.
Returns
-------
snowflake.connector.connection.SnowflakeConnection
The connection object.
Example
-------
The following example uses a cursor to submit an asynchronous query,
saves the query ID, then periodically checks the query status through
the connection before retrieving the results.
>>> import streamlit as st
>>> import time
>>>
>>> conn = st.connection("snowflake")
>>> cur = conn.cursor()
>>> cur.execute_async("SELECT * FROM my_table")
>>> query_id = cur.sfqid
>>> while True:
... status = conn.raw_connection.get_query_status(query_id)
... if conn.raw_connection.is_still_running(status):
... time.sleep(1)
... else:
... break
>>> cur.get_results_from_sfqid(query_id)
>>> df = cur.fetchall()
"""
return self._instance
def session(self) -> Session:
"""Create a new Snowpark session from this connection.
For information on how to use Snowpark sessions, see the
`Snowpark developer guide
<https://docs.snowflake.com/en/developer-guide/snowpark/python/working-with-dataframes>`_
and `Snowpark API Reference
<https://docs.snowflake.com/en/developer-guide/snowpark/reference/python/latest/snowpark/session>`_.
Returns
-------
snowflake.snowpark.Session
A new Snowpark session for this connection.
Example
-------
The following example creates a new Snowpark session and uses it to run
a query.
>>> import streamlit as st
>>>
>>> conn = st.connection("snowflake")
>>> session = conn.session()
>>> df = session.sql("SELECT * FROM my_table").collect()
"""
from snowflake.snowpark.context import get_active_session # type:ignore[import]
from snowflake.snowpark.session import Session # type:ignore[import]
if running_in_sis():
return get_active_session()
return cast(
"Session", Session.builder.configs({"connection": self._instance}).create()
)

View File

@@ -0,0 +1,213 @@
# Copyright (c) Streamlit Inc. (2018-2022) Snowflake Inc. (2022-2025)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# NOTE: We won't always be able to import from snowflake.snowpark.session so need the
# `type: ignore` comment below, but that comment will explode if `warn-unused-ignores` is
# turned on when the package is available. Unfortunately, mypy doesn't provide a good
# way to configure this at a per-line level :(
# mypy: no-warn-unused-ignores
from __future__ import annotations
import threading
from collections import ChainMap
from contextlib import contextmanager
from typing import TYPE_CHECKING, cast
from streamlit.connections import BaseConnection
from streamlit.connections.util import (
SNOWSQL_CONNECTION_FILE,
load_from_snowsql_config_file,
running_in_sis,
)
from streamlit.errors import StreamlitAPIException
from streamlit.runtime.caching import cache_data
if TYPE_CHECKING:
from collections.abc import Iterator
from datetime import timedelta
from pandas import DataFrame
from snowflake.snowpark.session import Session # type:ignore[import]
_REQUIRED_CONNECTION_PARAMS = {"account"}
class SnowparkConnection(BaseConnection["Session"]):
"""A connection to Snowpark using snowflake.snowpark.session.Session. Initialize using
``st.connection("<name>", type="snowpark")``.
In addition to providing access to the Snowpark Session, SnowparkConnection supports
direct SQL querying using ``query("...")`` and thread safe access using
``with conn.safe_session():``. See methods below for more information.
SnowparkConnections should always be created using ``st.connection()``, **not**
initialized directly.
.. note::
We don't expect this iteration of SnowparkConnection to be able to scale
well in apps with many concurrent users due to the lock contention that will occur
over the single underlying Session object under high load.
"""
def __init__(self, connection_name: str, **kwargs) -> None:
self._lock = threading.RLock()
super().__init__(connection_name, **kwargs)
def _connect(self, **kwargs) -> Session:
from snowflake.snowpark.context import get_active_session # type:ignore[import]
from snowflake.snowpark.session import Session
# If we're running in SiS, just call get_active_session(). Otherwise, attempt to
# create a new session from whatever credentials we have available.
if running_in_sis():
return get_active_session()
conn_params = ChainMap(
kwargs,
self._secrets.to_dict(),
load_from_snowsql_config_file(self._connection_name),
)
if not len(conn_params):
raise StreamlitAPIException(
"Missing Snowpark connection configuration. "
f"Did you forget to set this in `secrets.toml`, `{SNOWSQL_CONNECTION_FILE}`, "
"or as kwargs to `st.connection`?"
)
for p in _REQUIRED_CONNECTION_PARAMS:
if p not in conn_params:
raise StreamlitAPIException(f"Missing Snowpark connection param: {p}")
return cast("Session", Session.builder.configs(conn_params).create())
def query(
self,
sql: str,
ttl: float | int | timedelta | None = None,
) -> DataFrame:
"""Run a read-only SQL query.
This method implements both query result caching (with caching behavior
identical to that of using ``@st.cache_data``) as well as simple error handling/retries.
.. note::
Queries that are run without a specified ttl are cached indefinitely.
Parameters
----------
sql : str
The read-only SQL query to execute.
ttl : float, int, timedelta or None
The maximum number of seconds to keep results in the cache, or
None if cached results should not expire. The default is None.
Returns
-------
pandas.DataFrame
The result of running the query, formatted as a pandas DataFrame.
Example
-------
>>> import streamlit as st
>>>
>>> conn = st.connection("snowpark")
>>> df = conn.query("SELECT * FROM pet_owners")
>>> st.dataframe(df)
"""
from snowflake.snowpark.exceptions import ( # type:ignore[import]
SnowparkServerException,
)
from tenacity import (
retry,
retry_if_exception_type,
stop_after_attempt,
wait_fixed,
)
@retry(
after=lambda _: self.reset(),
stop=stop_after_attempt(3),
reraise=True,
retry=retry_if_exception_type(SnowparkServerException),
wait=wait_fixed(1),
)
def _query(sql: str) -> DataFrame:
with self._lock:
return self._instance.sql(sql).to_pandas()
# We modify our helper function's `__qualname__` here to work around default
# `@st.cache_data` behavior. Otherwise, `.query()` being called with different
# `ttl` values will reset the cache with each call, and the query caches won't
# be scoped by connection.
ttl_str = str( # Avoid adding extra `.` characters to `__qualname__`
ttl
).replace(".", "_")
_query.__qualname__ = f"{_query.__qualname__}_{self._connection_name}_{ttl_str}"
_query = cache_data(
show_spinner="Running `snowpark.query(...)`.",
ttl=ttl,
)(_query)
return _query(sql)
@property
def session(self) -> Session:
"""Access the underlying Snowpark session.
.. note::
Snowpark sessions are **not** thread safe. Users of this method are
responsible for ensuring that access to the session returned by this method is
done in a thread-safe manner. For most users, we recommend using the thread-safe
safe_session() method and a ``with`` block.
Information on how to use Snowpark sessions can be found in the `Snowpark documentation
<https://docs.snowflake.com/en/developer-guide/snowpark/python/working-with-dataframes>`_.
Example
-------
>>> import streamlit as st
>>>
>>> session = st.connection("snowpark").session
>>> df = session.table("mytable").limit(10).to_pandas()
>>> st.dataframe(df)
"""
return self._instance
@contextmanager
def safe_session(self) -> Iterator[Session]:
"""Grab the underlying Snowpark session in a thread-safe manner.
As operations on a Snowpark session are not thread safe, we need to take care
when using a session in the context of a Streamlit app where each script run
occurs in its own thread. Using the contextmanager pattern to do this ensures
that access on this connection's underlying Session is done in a thread-safe
manner.
Information on how to use Snowpark sessions can be found in the `Snowpark documentation
<https://docs.snowflake.com/en/developer-guide/snowpark/python/working-with-dataframes>`_.
Example
-------
>>> import streamlit as st
>>>
>>> conn = st.connection("snowpark")
>>> with conn.safe_session() as session:
... df = session.table("mytable").limit(10).to_pandas()
>>>
>>> st.dataframe(df)
"""
with self._lock:
yield self.session

View File

@@ -0,0 +1,424 @@
# Copyright (c) Streamlit Inc. (2018-2022) Snowflake Inc. (2022-2025)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# NOTE: We ignore all mypy import-not-found errors as top-level since
# this module is optional and the SQLAlchemy dependency is not installed
# by default.
# mypy: disable-error-code="import-not-found, redundant-cast"
from __future__ import annotations
from collections import ChainMap
from copy import deepcopy
from typing import TYPE_CHECKING, cast
from streamlit.connections import BaseConnection
from streamlit.connections.util import extract_from_dict
from streamlit.errors import StreamlitAPIException
from streamlit.runtime.caching import cache_data
if TYPE_CHECKING:
from datetime import timedelta
from pandas import DataFrame
from sqlalchemy.engine import Connection as SQLAlchemyConnection
from sqlalchemy.engine.base import Engine
from sqlalchemy.orm import Session
_ALL_CONNECTION_PARAMS = {
"url",
"driver",
"dialect",
"username",
"password",
"host",
"port",
"database",
"query",
}
_REQUIRED_CONNECTION_PARAMS = {"dialect", "username", "host"}
class SQLConnection(BaseConnection["Engine"]):
"""A connection to a SQL database using a SQLAlchemy Engine.
Initialize this connection object using ``st.connection("sql")`` or
``st.connection("<name>", type="sql")``. Connection parameters for a
SQLConnection can be specified using ``secrets.toml`` and/or ``**kwargs``.
Possible connection parameters include:
- ``url`` or keyword arguments for |sqlalchemy.engine.URL.create()|_, except
``drivername``. Use ``dialect`` and ``driver`` instead of ``drivername``.
- Keyword arguments for |sqlalchemy.create_engine()|_, including custom
``connect()`` arguments used by your specific ``dialect`` or ``driver``.
- ``autocommit``. If this is ``False`` (default), the connection operates
in manual commit (transactional) mode. If this is ``True``, the
connection operates in autocommit (non-transactional) mode.
If ``url`` exists as a connection parameter, Streamlit will pass it to
``sqlalchemy.engine.make_url()``. Otherwise, Streamlit requires (at a
minimum) ``dialect``, ``username``, and ``host``. Streamlit will use
``dialect`` and ``driver`` (if defined) to derive ``drivername``, then pass
the relevant connection parameters to ``sqlalchemy.engine.URL.create()``.
In addition to the default keyword arguments for ``sqlalchemy.create_engine()``,
your dialect may accept additional keyword arguments. For example, if you
use ``dialect="snowflake"`` with `Snowflake SQLAlchemy
<https://github.com/snowflakedb/snowflake-sqlalchemy#key-pair-authentication-support>`_,
you can pass a value for ``private_key`` to use key-pair authentication. If
you use ``dialect="bigquery"`` with `Google BigQuery
<https://github.com/googleapis/python-bigquery-sqlalchemy#authentication>`_,
you can pass a value for ``location``.
SQLConnection provides the ``.query()`` convenience method, which can be
used to run simple, read-only queries with both caching and simple error
handling/retries. More complex database interactions can be performed by
using the ``.session`` property to receive a regular SQLAlchemy Session.
.. Important::
`SQLAlchemy <https://pypi.org/project/SQLAlchemy/>`_ must be installed
in your environment to use this connection. You must also install your
driver, such as ``pyodbc`` or ``psycopg2``.
.. |sqlalchemy.engine.URL.create()| replace:: ``sqlalchemy.engine.URL.create()``
.. _sqlalchemy.engine.URL.create(): https://docs.sqlalchemy.org/en/20/core/engines.html#sqlalchemy.engine.URL.create
.. |sqlalchemy.engine.make_url()| replace:: ``sqlalchemy.engine.make_url()``
.. _sqlalchemy.engine.make_url(): https://docs.sqlalchemy.org/en/20/core/engines.html#sqlalchemy.engine.make_url
.. |sqlalchemy.create_engine()| replace:: ``sqlalchemy.create_engine()``
.. _sqlalchemy.create_engine(): https://docs.sqlalchemy.org/en/20/core/engines.html#sqlalchemy.create_engine
Examples
--------
**Example 1: Configuration with URL**
You can configure your SQL connection using Streamlit's
`Secrets management <https://docs.streamlit.io/develop/concepts/connections/secrets-management>`_.
The following example specifies a SQL connection URL.
``.streamlit/secrets.toml``:
>>> [connections.sql]
>>> url = "xxx+xxx://xxx:xxx@xxx:xxx/xxx"
Your app code:
>>> import streamlit as st
>>>
>>> conn = st.connection("sql")
>>> df = conn.query("SELECT * FROM pet_owners")
>>> st.dataframe(df)
**Example 2: Configuration with dialect, host, and username**
If you do not specify ``url``, you must at least specify ``dialect``,
``host``, and ``username`` instead. The following example also includes
``password``.
``.streamlit/secrets.toml``:
>>> [connections.sql]
>>> dialect = "xxx"
>>> host = "xxx"
>>> username = "xxx"
>>> password = "xxx"
Your app code:
>>> import streamlit as st
>>>
>>> conn = st.connection("sql")
>>> df = conn.query("SELECT * FROM pet_owners")
>>> st.dataframe(df)
**Example 3: Configuration with keyword arguments**
You can configure your SQL connection with keyword arguments (with or
without ``secrets.toml``). For example, if you use Microsoft Entra ID with
a Microsoft Azure SQL server, you can quickly set up a local connection for
development using `interactive authentication
<https://learn.microsoft.com/en-us/sql/connect/odbc/using-azure-active-directory?view=sql-server-ver16#new-andor-modified-dsn-and-connection-string-keywords>`_.
This example requires the `Microsoft ODBC Driver for SQL Server
<https://learn.microsoft.com/en-us/sql/connect/odbc/microsoft-odbc-driver-for-sql-server?view=sql-server-ver16>`_
for *Windows* in addition to the ``sqlalchemy`` and ``pyodbc`` packages for
Python.
>>> import streamlit as st
>>>
>>> conn = st.connection(
... "sql",
... dialect="mssql",
... driver="pyodbc",
... host="xxx.database.windows.net",
... database="xxx",
... username="xxx",
... query={
... "driver": "ODBC Driver 18 for SQL Server",
... "authentication": "ActiveDirectoryInteractive",
... "encrypt": "yes",
... },
... )
>>>
>>> df = conn.query("SELECT * FROM pet_owners")
>>> st.dataframe(df)
"""
def _connect(self, autocommit: bool = False, **kwargs) -> Engine:
import sqlalchemy
kwargs = deepcopy(kwargs)
conn_param_kwargs = extract_from_dict(_ALL_CONNECTION_PARAMS, kwargs)
conn_params = ChainMap(conn_param_kwargs, self._secrets.to_dict())
if not len(conn_params):
raise StreamlitAPIException(
"Missing SQL DB connection configuration. "
"Did you forget to set this in `secrets.toml` or as kwargs to `st.connection`?"
)
if "url" in conn_params:
url = sqlalchemy.engine.make_url(conn_params["url"])
else:
for p in _REQUIRED_CONNECTION_PARAMS:
if p not in conn_params:
raise StreamlitAPIException(f"Missing SQL DB connection param: {p}")
drivername = conn_params["dialect"] + (
f"+{conn_params['driver']}" if "driver" in conn_params else ""
)
url = sqlalchemy.engine.URL.create(
drivername=drivername,
username=conn_params["username"],
password=conn_params.get("password"),
host=conn_params["host"],
port=int(conn_params["port"]) if "port" in conn_params else None,
database=conn_params.get("database"),
query=conn_params["query"] if "query" in conn_params else None,
)
create_engine_kwargs = ChainMap(
kwargs, self._secrets.get("create_engine_kwargs", {})
)
eng = sqlalchemy.create_engine(url, **create_engine_kwargs)
if autocommit:
return cast("Engine", eng.execution_options(isolation_level="AUTOCOMMIT"))
else:
return cast("Engine", eng)
def query(
self,
sql: str,
*, # keyword-only arguments:
show_spinner: bool | str = "Running `sql.query(...)`.",
ttl: float | int | timedelta | None = None,
index_col: str | list[str] | None = None,
chunksize: int | None = None,
params=None,
**kwargs,
) -> DataFrame:
"""Run a read-only query.
This method implements query result caching and simple error
handling/retries. The caching behavior is identical to that of using
``@st.cache_data``.
.. note::
Queries that are run without a specified ttl are cached indefinitely.
All keyword arguments passed to this function are passed down to
|pandas.read_sql|_, except ``ttl``.
.. |pandas.read_sql| replace:: ``pandas.read_sql``
.. _pandas.read_sql: https://pandas.pydata.org/docs/reference/api/pandas.read_sql.html
Parameters
----------
sql : str
The read-only SQL query to execute.
show_spinner : boolean or string
Enable the spinner. The default is to show a spinner when there is a
"cache miss" and the cached resource is being created. If a string, the value
of the show_spinner param will be used for the spinner text.
ttl : float, int, timedelta or None
The maximum number of seconds to keep results in the cache, or
None if cached results should not expire. The default is None.
index_col : str, list of str, or None
Column(s) to set as index(MultiIndex). Default is None.
chunksize : int or None
If specified, return an iterator where chunksize is the number of
rows to include in each chunk. Default is None.
params : list, tuple, dict or None
List of parameters to pass to the execute method. The syntax used to pass
parameters is database driver dependent. Check your database driver
documentation for which of the five syntax styles, described in `PEP 249
paramstyle <https://peps.python.org/pep-0249/#paramstyle>`_, is supported.
Default is None.
**kwargs: dict
Additional keyword arguments are passed to |pandas.read_sql|_.
.. |pandas.read_sql| replace:: ``pandas.read_sql``
.. _pandas.read_sql: https://pandas.pydata.org/docs/reference/api/pandas.read_sql.html
Returns
-------
pandas.DataFrame
The result of running the query, formatted as a pandas DataFrame.
Example
-------
>>> import streamlit as st
>>>
>>> conn = st.connection("sql")
>>> df = conn.query(
... "SELECT * FROM pet_owners WHERE owner = :owner",
... ttl=3600,
... params={"owner": "barbara"},
... )
>>> st.dataframe(df)
"""
from sqlalchemy import text
from sqlalchemy.exc import DatabaseError, InternalError, OperationalError
from tenacity import (
retry,
retry_if_exception_type,
stop_after_attempt,
wait_fixed,
)
@retry(
after=lambda _: self.reset(),
stop=stop_after_attempt(3),
reraise=True,
retry=retry_if_exception_type(
(DatabaseError, InternalError, OperationalError)
),
wait=wait_fixed(1),
)
def _query(
sql: str,
index_col=None,
chunksize=None,
params=None,
**kwargs,
) -> DataFrame:
import pandas as pd
instance = self._instance.connect()
return pd.read_sql(
text(sql),
instance,
index_col=index_col,
chunksize=chunksize,
params=params,
**kwargs,
)
# We modify our helper function's `__qualname__` here to work around default
# `@st.cache_data` behavior. Otherwise, `.query()` being called with different
# `ttl` values will reset the cache with each call, and the query caches won't
# be scoped by connection.
ttl_str = str( # Avoid adding extra `.` characters to `__qualname__`
ttl
).replace(".", "_")
_query.__qualname__ = f"{_query.__qualname__}_{self._connection_name}_{ttl_str}"
_query = cache_data(
show_spinner=show_spinner,
ttl=ttl,
)(_query)
return _query(
sql,
index_col=index_col,
chunksize=chunksize,
params=params,
**kwargs,
)
def connect(self) -> SQLAlchemyConnection:
"""Call ``.connect()`` on the underlying SQLAlchemy Engine, returning a new\
connection object.
Calling this method is equivalent to calling ``self._instance.connect()``.
NOTE: This method should not be confused with the internal ``_connect`` method used
to implement a Streamlit Connection.
Returns
-------
sqlalchemy.engine.Connection
A new SQLAlchemy connection object.
"""
return self._instance.connect()
@property
def engine(self) -> Engine:
"""The underlying SQLAlchemy Engine.
This is equivalent to accessing ``self._instance``.
Returns
-------
sqlalchemy.engine.base.Engine
The underlying SQLAlchemy Engine.
"""
return self._instance
@property
def driver(self) -> str:
"""The name of the driver used by the underlying SQLAlchemy Engine.
This is equivalent to accessing ``self._instance.driver``.
Returns
-------
str
The name of the driver. For example, ``"pyodbc"`` or ``"psycopg2"``.
"""
return cast("str", self._instance.driver)
@property
def session(self) -> Session:
"""Return a SQLAlchemy Session.
Users of this connection should use the contextmanager pattern for writes,
transactions, and anything more complex than simple read queries.
See the usage example below, which assumes we have a table ``numbers`` with a
single integer column ``val``. The `SQLAlchemy
<https://docs.sqlalchemy.org/en/20/orm/session_basics.html>`_ docs also contain
much more information on the usage of sessions.
Returns
-------
sqlalchemy.orm.Session
A SQLAlchemy Session.
Example
-------
>>> import streamlit as st
>>> conn = st.connection("sql")
>>> n = st.slider("Pick a number")
>>> if st.button("Add the number!"):
... with conn.session as session:
... session.execute("INSERT INTO numbers (val) VALUES (:n);", {"n": n})
... session.commit()
"""
from sqlalchemy.orm import Session
return Session(self._instance)

View File

@@ -0,0 +1,97 @@
# Copyright (c) Streamlit Inc. (2018-2022) Snowflake Inc. (2022-2025)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# NOTE: We won't always be able to import from snowflake.connector.connection so need the
# `type: ignore` comment below, but that comment will explode if `warn-unused-ignores` is
# turned on when the package is available. Unfortunately, mypy doesn't provide a good
# way to configure this at a per-line level :(
# mypy: no-warn-unused-ignores
from __future__ import annotations
import os
from typing import TYPE_CHECKING, Any, cast
if TYPE_CHECKING:
from collections.abc import Collection
SNOWSQL_CONNECTION_FILE = "~/.snowsql/config"
def extract_from_dict(
keys: Collection[str], source_dict: dict[str, Any]
) -> dict[str, Any]:
"""Extract the specified keys from source_dict and return them in a new dict.
Parameters
----------
keys : Collection[str]
The keys to extract from source_dict.
source_dict : Dict[str, Any]
The dict to extract keys from. Note that this function mutates source_dict.
Returns
-------
Dict[str, Any]
A new dict containing the keys/values extracted from source_dict.
"""
d = {}
for k in keys:
if k in source_dict:
d[k] = source_dict.pop(k)
return d
def load_from_snowsql_config_file(connection_name: str) -> dict[str, Any]:
"""Loads the dictionary from snowsql config file."""
snowsql_config_file = os.path.expanduser(SNOWSQL_CONNECTION_FILE)
if not os.path.exists(snowsql_config_file):
return {}
# Lazy-load config parser for better import / startup performance
import configparser
config = configparser.ConfigParser(inline_comment_prefixes="#")
config.read(snowsql_config_file)
if f"connections.{connection_name}" in config:
raw_conn_params = config[f"connections.{connection_name}"]
elif "connections" in config:
raw_conn_params = config["connections"]
else:
return {}
conn_params = {
k.replace("name", ""): v.strip('"') for k, v in raw_conn_params.items()
}
if "db" in conn_params:
conn_params["database"] = conn_params["db"]
del conn_params["db"]
return conn_params
def running_in_sis() -> bool:
"""Return whether this app is running in SiS."""
try:
from snowflake.snowpark._internal.utils import ( # type: ignore[import] # isort: skip
is_in_stored_procedure,
)
return cast("bool", is_in_stored_procedure())
except ModuleNotFoundError:
return False